{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using Theano backend.\n",
      "Using gpu device 1: GeForce GTX TITAN X (CNMeM is enabled with initial size: 80.0% of memory, cuDNN 5110)\n",
      "/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.\n",
      "  warnings.warn(warn)\n"
     ]
    }
   ],
   "source": [
    "import ast\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "import datetime\n",
    "\n",
    "from keras.layers import Input, Dense, Embedding, merge, Flatten, Merge, BatchNormalization\n",
    "from keras.models import Model, load_model\n",
    "from keras.regularizers import l2\n",
    "import keras.backend as K\n",
    "from keras.optimizers import SGD\n",
    "import numpy as np\n",
    "\n",
    "from sklearn.cluster import MeanShift, estimate_bandwidth\n",
    "\n",
    "import utils\n",
    "\n",
    "import data\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "from bcolz_array_iterator import BcolzArrayIterator\n",
    "\n",
    "import bcolz\n",
    "\n",
    "from keras_tqdm import TQDMNotebookCallback\n",
    "from keras.callbacks import ModelCheckpoint"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Below path is a shared directory, swap to own"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data_path = \"/data/datasets/taxi/\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## Replication of 'csv_to_hdf5.py'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Original repo used some bizarre tuple method of reading in data to save in a hdf5 file using fuel. The following does the same approach in that module, only using pandas and saving in a bcolz format (w/ training data as example)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "meta = pd.read_csv(data_path+'metaData_taxistandsID_name_GPSlocation.csv', header=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Descricao</th>\n",
       "      <th>Latitude</th>\n",
       "      <th>Longitude</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Agra</td>\n",
       "      <td>41.177146</td>\n",
       "      <td>-8.609670</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Alameda</td>\n",
       "      <td>41.156190</td>\n",
       "      <td>-8.591064</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Aldoar</td>\n",
       "      <td>41.170525</td>\n",
       "      <td>-8.665876</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Alfândega</td>\n",
       "      <td>41.143764</td>\n",
       "      <td>-8.621803</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Amial</td>\n",
       "      <td>41.183510</td>\n",
       "      <td>-8.612726</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ID  Descricao   Latitude  Longitude\n",
       "0   1       Agra  41.177146  -8.609670\n",
       "1   2    Alameda  41.156190  -8.591064\n",
       "2   3     Aldoar  41.170525  -8.665876\n",
       "3   4  Alfândega  41.143764  -8.621803\n",
       "4   5      Amial  41.183510  -8.612726"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meta.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train = pd.read_csv(data_path+'train/train.csv', header=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>TRIP_ID</th>\n",
       "      <th>CALL_TYPE</th>\n",
       "      <th>ORIGIN_CALL</th>\n",
       "      <th>ORIGIN_STAND</th>\n",
       "      <th>TAXI_ID</th>\n",
       "      <th>TIMESTAMP</th>\n",
       "      <th>DAY_TYPE</th>\n",
       "      <th>MISSING_DATA</th>\n",
       "      <th>POLYLINE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1372636858620000589</td>\n",
       "      <td>C</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20000589</td>\n",
       "      <td>1372636858</td>\n",
       "      <td>A</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.618643,41.141412],[-8.618499,41.141376],[...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1372637303620000596</td>\n",
       "      <td>B</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>20000596</td>\n",
       "      <td>1372637303</td>\n",
       "      <td>A</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.639847,41.159826],[-8.640351,41.159871],[...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1372636951620000320</td>\n",
       "      <td>C</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20000320</td>\n",
       "      <td>1372636951</td>\n",
       "      <td>A</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.612964,41.140359],[-8.613378,41.14035],[-...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1372636854620000520</td>\n",
       "      <td>C</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20000520</td>\n",
       "      <td>1372636854</td>\n",
       "      <td>A</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.574678,41.151951],[-8.574705,41.151942],[...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1372637091620000337</td>\n",
       "      <td>C</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20000337</td>\n",
       "      <td>1372637091</td>\n",
       "      <td>A</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.645994,41.18049],[-8.645949,41.180517],[-...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               TRIP_ID CALL_TYPE  ORIGIN_CALL  ORIGIN_STAND   TAXI_ID  \\\n",
       "0  1372636858620000589         C          NaN           NaN  20000589   \n",
       "1  1372637303620000596         B          NaN           7.0  20000596   \n",
       "2  1372636951620000320         C          NaN           NaN  20000320   \n",
       "3  1372636854620000520         C          NaN           NaN  20000520   \n",
       "4  1372637091620000337         C          NaN           NaN  20000337   \n",
       "\n",
       "    TIMESTAMP DAY_TYPE MISSING_DATA  \\\n",
       "0  1372636858        A        False   \n",
       "1  1372637303        A        False   \n",
       "2  1372636951        A        False   \n",
       "3  1372636854        A        False   \n",
       "4  1372637091        A        False   \n",
       "\n",
       "                                            POLYLINE  \n",
       "0  [[-8.618643,41.141412],[-8.618499,41.141376],[...  \n",
       "1  [[-8.639847,41.159826],[-8.640351,41.159871],[...  \n",
       "2  [[-8.612964,41.140359],[-8.613378,41.14035],[-...  \n",
       "3  [[-8.574678,41.151951],[-8.574705,41.151942],[...  \n",
       "4  [[-8.645994,41.18049],[-8.645949,41.180517],[-...  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train['ORIGIN_CALL'] = pd.Series(pd.factorize(train['ORIGIN_CALL'])[0]) + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train['ORIGIN_STAND']=pd.Series([0 if pd.isnull(x) or x=='' else int(x) for x in train[\"ORIGIN_STAND\"]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train['TAXI_ID'] = pd.Series(pd.factorize(train['TAXI_ID'])[0]) + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train['DAY_TYPE'] = pd.Series([ord(x[0]) - ord('A') for x in train['DAY_TYPE']])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "The array of long/lat coordinates per trip (row) is read in as a string. The function `ast.literal_eval(x)` evaluates the string into the expression it represents (safely). This happens below"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "polyline = pd.Series([ast.literal_eval(x) for x in train['POLYLINE']])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Split into latitude/longitude"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train['LATITUDE'] = pd.Series([np.array([point[1] for point in poly],dtype=np.float32) for poly in polyline])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train['LONGITUDE'] = pd.Series([np.array([point[0] for point in poly],dtype=np.float32) for poly in polyline])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "utils.save_array(data_path+'train/train.bc', train.as_matrix())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "utils.save_array(data_path+'train/meta_train.bc', meta.as_matrix())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## Further Feature Engineering"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "After converting 'csv_to_hdf5.py' functionality to pandas, I saved that array and then simply constructed the rest of the features as specified in the paper using pandas. I didn't bother seeing how the author did it as it was extremely obtuse and involved the fuel module."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 424,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train = pd.DataFrame(utils.load_array(data_path+'train/train.bc'), columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n",
    "       'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 425,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>TRIP_ID</th>\n",
       "      <th>CALL_TYPE</th>\n",
       "      <th>ORIGIN_CALL</th>\n",
       "      <th>ORIGIN_STAND</th>\n",
       "      <th>TAXI_ID</th>\n",
       "      <th>TIMESTAMP</th>\n",
       "      <th>DAY_TYPE</th>\n",
       "      <th>MISSING_DATA</th>\n",
       "      <th>POLYLINE</th>\n",
       "      <th>LATITUDE</th>\n",
       "      <th>LONGITUDE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1372636858620000589</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1372636858</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.618643,41.141412],[-8.618499,41.141376],[...</td>\n",
       "      <td>[41.1414, 41.1414, 41.1425, 41.1438, 41.1444, ...</td>\n",
       "      <td>[-8.61864, -8.6185, -8.62033, -8.62215, -8.623...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1372637303620000596</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>1372637303</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.639847,41.159826],[-8.640351,41.159871],[...</td>\n",
       "      <td>[41.1598, 41.1599, 41.1601, 41.1605, 41.1609, ...</td>\n",
       "      <td>[-8.63985, -8.64035, -8.6422, -8.64445, -8.646...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1372636951620000320</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1372636951</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.612964,41.140359],[-8.613378,41.14035],[-...</td>\n",
       "      <td>[41.1404, 41.1404, 41.1403, 41.1404, 41.1404, ...</td>\n",
       "      <td>[-8.61296, -8.61338, -8.61421, -8.61477, -8.61...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1372636854620000520</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1372636854</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.574678,41.151951],[-8.574705,41.151942],[...</td>\n",
       "      <td>[41.152, 41.1519, 41.1519, 41.152, 41.1519, 41...</td>\n",
       "      <td>[-8.57468, -8.57471, -8.5747, -8.57466, -8.574...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1372637091620000337</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>1372637091</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.645994,41.18049],[-8.645949,41.180517],[-...</td>\n",
       "      <td>[41.1805, 41.1805, 41.18, 41.1789, 41.1785, 41...</td>\n",
       "      <td>[-8.64599, -8.64595, -8.64605, -8.6468, -8.649...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               TRIP_ID CALL_TYPE ORIGIN_CALL ORIGIN_STAND TAXI_ID   TIMESTAMP  \\\n",
       "0  1372636858620000589         C           0            0       1  1372636858   \n",
       "1  1372637303620000596         B           0            7       2  1372637303   \n",
       "2  1372636951620000320         C           0            0       3  1372636951   \n",
       "3  1372636854620000520         C           0            0       4  1372636854   \n",
       "4  1372637091620000337         C           0            0       5  1372637091   \n",
       "\n",
       "  DAY_TYPE MISSING_DATA                                           POLYLINE  \\\n",
       "0        0        False  [[-8.618643,41.141412],[-8.618499,41.141376],[...   \n",
       "1        0        False  [[-8.639847,41.159826],[-8.640351,41.159871],[...   \n",
       "2        0        False  [[-8.612964,41.140359],[-8.613378,41.14035],[-...   \n",
       "3        0        False  [[-8.574678,41.151951],[-8.574705,41.151942],[...   \n",
       "4        0        False  [[-8.645994,41.18049],[-8.645949,41.180517],[-...   \n",
       "\n",
       "                                            LATITUDE  \\\n",
       "0  [41.1414, 41.1414, 41.1425, 41.1438, 41.1444, ...   \n",
       "1  [41.1598, 41.1599, 41.1601, 41.1605, 41.1609, ...   \n",
       "2  [41.1404, 41.1404, 41.1403, 41.1404, 41.1404, ...   \n",
       "3  [41.152, 41.1519, 41.1519, 41.152, 41.1519, 41...   \n",
       "4  [41.1805, 41.1805, 41.18, 41.1789, 41.1785, 41...   \n",
       "\n",
       "                                           LONGITUDE  \n",
       "0  [-8.61864, -8.6185, -8.62033, -8.62215, -8.623...  \n",
       "1  [-8.63985, -8.64035, -8.6422, -8.64445, -8.646...  \n",
       "2  [-8.61296, -8.61338, -8.61421, -8.61477, -8.61...  \n",
       "3  [-8.57468, -8.57471, -8.5747, -8.57466, -8.574...  \n",
       "4  [-8.64599, -8.64595, -8.64605, -8.6468, -8.649...  "
      ]
     },
     "execution_count": 425,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "The paper discusses how many categorical variables there are per category. The following all check out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 426,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "57105"
      ]
     },
     "execution_count": 426,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['ORIGIN_CALL'].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 427,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "63"
      ]
     },
     "execution_count": 427,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['ORIGIN_STAND'].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 428,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "448"
      ]
     },
     "execution_count": 428,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['TAXI_ID'].max()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Self-explanatory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 429,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train['DAY_OF_WEEK'] = pd.Series([datetime.datetime.fromtimestamp(t).weekday() for t in train['TIMESTAMP']])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Quarter hour of the day, i.e. 1 of the `4*24 = 96` quarter hours of the day"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 430,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train['QUARTER_HOUR'] = pd.Series([int((datetime.datetime.fromtimestamp(t).hour*60 + datetime.datetime.fromtimestamp(t).minute)/15)\n",
    "                                   for t in train['TIMESTAMP']])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Self-explanatory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 431,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train['WEEK_OF_YEAR'] = pd.Series([datetime.datetime.fromtimestamp(t).isocalendar()[1] for t in train['TIMESTAMP']])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Target coords are the last in the sequence (final position). If there are no positions, or only 1, then mark as invalid w/ nan in order to drop later"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 433,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train['TARGET'] = pd.Series([[l[1][0][-1], l[1][1][-1]] if len(l[1][0]) > 1 else numpy.nan for l in train[['LONGITUDE','LATITUDE']].iterrows()])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "This function creates the continuous inputs, which are the concatened k first and k last coords in a sequence, as discussed in the paper. \n",
    "\n",
    "If there aren't at least 2* k coords excluding the target, then the k first and k last overlap. In this case the sequence (excluding target) is padded at the end with the last coord in the sequence. The paper mentioned they padded front and back but didn't specify in what manner.\n",
    "\n",
    "Also marks any invalid w/ na's"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 437,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def start_stop_inputs(k):\n",
    "    result = []\n",
    "    for l in train[['LONGITUDE','LATITUDE']].iterrows():\n",
    "        if len(l[1][0]) < 2 or len(l[1][1]) < 2:\n",
    "            result.append(numpy.nan)\n",
    "        elif len(l[1][0][:-1]) >= 2*k:\n",
    "            result.append(numpy.concatenate([l[1][0][0:k],l[1][0][-(k+1):-1],l[1][1][0:k],l[1][1][-(k+1):-1]]).flatten())\n",
    "        else:\n",
    "            l1 = numpy.lib.pad(l[1][0][:-1], (0,20-len(l[1][0][:-1])), mode='edge')\n",
    "            l2 = numpy.lib.pad(l[1][1][:-1], (0,20-len(l[1][1][:-1])), mode='edge')\n",
    "            result.append(numpy.concatenate([l1[0:k],l1[-k:],l2[0:k],l2[-k:]]).flatten())\n",
    "    return pd.Series(result)        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 438,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train['COORD_FEATURES'] = start_stop_inputs(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 442,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1710670, 16)"
      ]
     },
     "execution_count": 442,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 441,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1674160, 16)"
      ]
     },
     "execution_count": 441,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.dropna().shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Drop na's"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 443,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train = train.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 446,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "utils.save_array(data_path+'train/train_features.bc', train.as_matrix())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## End to end feature transformation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train = pd.read_csv(data_path+'train/train.csv', header=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "test = pd.read_csv(data_path+'test/test.csv', header=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def start_stop_inputs(k, data, test):\n",
    "    result = []\n",
    "    for l in data[['LONGITUDE','LATITUDE']].iterrows():\n",
    "        if not test:\n",
    "            if len(l[1][0]) < 2 or len(l[1][1]) < 2:\n",
    "                result.append(np.nan)\n",
    "            elif len(l[1][0][:-1]) >= 2*k:\n",
    "                result.append(np.concatenate([l[1][0][0:k],l[1][0][-(k+1):-1],l[1][1][0:k],l[1][1][-(k+1):-1]]).flatten())\n",
    "            else:\n",
    "                l1 = np.lib.pad(l[1][0][:-1], (0,4*k-len(l[1][0][:-1])), mode='edge')\n",
    "                l2 = np.lib.pad(l[1][1][:-1], (0,4*k-len(l[1][1][:-1])), mode='edge')\n",
    "                result.append(np.concatenate([l1[0:k],l1[-k:],l2[0:k],l2[-k:]]).flatten())\n",
    "        else:\n",
    "            if len(l[1][0]) < 1 or len(l[1][1]) < 1:\n",
    "                result.append(np.nan)\n",
    "            elif len(l[1][0]) >= 2*k:\n",
    "                result.append(np.concatenate([l[1][0][0:k],l[1][0][-k:],l[1][1][0:k],l[1][1][-k:]]).flatten())\n",
    "            else:\n",
    "                l1 = np.lib.pad(l[1][0], (0,4*k-len(l[1][0])), mode='edge')\n",
    "                l2 = np.lib.pad(l[1][1], (0,4*k-len(l[1][1])), mode='edge')\n",
    "                result.append(np.concatenate([l1[0:k],l1[-k:],l2[0:k],l2[-k:]]).flatten())\n",
    "    return pd.Series(result)     "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Pre-calculated below on train set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "lat_mean = 41.15731\n",
    "lat_std = 0.074120656\n",
    "long_mean = -8.6161413\n",
    "long_std = 0.057200309"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def feature_ext(data, test=False):   \n",
    "    \n",
    "    data['ORIGIN_CALL'] = pd.Series(pd.factorize(data['ORIGIN_CALL'])[0]) + 1\n",
    "\n",
    "    data['ORIGIN_STAND']=pd.Series([0 if pd.isnull(x) or x=='' else int(x) for x in data[\"ORIGIN_STAND\"]])\n",
    "\n",
    "    data['TAXI_ID'] = pd.Series(pd.factorize(data['TAXI_ID'])[0]) + 1\n",
    "\n",
    "    data['DAY_TYPE'] = pd.Series([ord(x[0]) - ord('A') for x in data['DAY_TYPE']])\n",
    "\n",
    "    polyline = pd.Series([ast.literal_eval(x) for x in data['POLYLINE']])\n",
    "\n",
    "    data['LATITUDE'] = pd.Series([np.array([point[1] for point in poly],dtype=np.float32) for poly in polyline])\n",
    "\n",
    "    data['LONGITUDE'] = pd.Series([np.array([point[0] for point in poly],dtype=np.float32) for poly in polyline])\n",
    "    \n",
    "    if not test:\n",
    "    \n",
    "        data['TARGET'] = pd.Series([[l[1][0][-1], l[1][1][-1]] if len(l[1][0]) > 1 else np.nan for l in data[['LONGITUDE','LATITUDE']].iterrows()])\n",
    "\n",
    "    \n",
    "    data['LATITUDE'] = pd.Series([(t-lat_mean)/lat_std for t in data['LATITUDE']])\n",
    "    \n",
    "    data['LONGITUDE'] = pd.Series([(t-long_mean)/long_std for t in data['LONGITUDE']])\n",
    "    \n",
    "    data['COORD_FEATURES'] = start_stop_inputs(5, data, test)\n",
    "\n",
    "    data['DAY_OF_WEEK'] = pd.Series([datetime.datetime.fromtimestamp(t).weekday() for t in data['TIMESTAMP']])\n",
    "\n",
    "    data['QUARTER_HOUR'] = pd.Series([int((datetime.datetime.fromtimestamp(t).hour*60 + datetime.datetime.fromtimestamp(t).minute)/15)\n",
    "                                       for t in data['TIMESTAMP']])\n",
    "\n",
    "    data['WEEK_OF_YEAR'] = pd.Series([datetime.datetime.fromtimestamp(t).isocalendar()[1] for t in data['TIMESTAMP']])\n",
    "    \n",
    "        \n",
    "    data = data.dropna()\n",
    "\n",
    "    return data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train = feature_ext(train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "test = feature_ext(test, test=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>TRIP_ID</th>\n",
       "      <th>CALL_TYPE</th>\n",
       "      <th>ORIGIN_CALL</th>\n",
       "      <th>ORIGIN_STAND</th>\n",
       "      <th>TAXI_ID</th>\n",
       "      <th>TIMESTAMP</th>\n",
       "      <th>DAY_TYPE</th>\n",
       "      <th>MISSING_DATA</th>\n",
       "      <th>POLYLINE</th>\n",
       "      <th>LATITUDE</th>\n",
       "      <th>LONGITUDE</th>\n",
       "      <th>COORD_FEATURES</th>\n",
       "      <th>DAY_OF_WEEK</th>\n",
       "      <th>QUARTER_HOUR</th>\n",
       "      <th>WEEK_OF_YEAR</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>T1</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>1408039037</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.585676,41.148522],[-8.585712,41.148639],[...</td>\n",
       "      <td>[-0.118578, -0.116982, -0.1141, -0.113122, -0....</td>\n",
       "      <td>[0.532604, 0.531971, 0.532454, 0.531671, 0.527...</td>\n",
       "      <td>[0.532604, 0.531971, 0.532454, 0.531671, 0.527...</td>\n",
       "      <td>3</td>\n",
       "      <td>43</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>T2</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>57</td>\n",
       "      <td>2</td>\n",
       "      <td>1408038611</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.610876,41.14557],[-8.610858,41.145579],[-...</td>\n",
       "      <td>[-0.158413, -0.158258, -0.155736, -0.150024, -...</td>\n",
       "      <td>[0.0920491, 0.0923659, 0.0915823, 0.0996017, 0...</td>\n",
       "      <td>[0.0920491, 0.0923659, 0.0915823, 0.0996017, 0...</td>\n",
       "      <td>3</td>\n",
       "      <td>43</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>T3</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>1408038568</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.585739,41.148558],[-8.58573,41.148828],[-...</td>\n",
       "      <td>[-0.118063, -0.11446, -0.112505, -0.111887, -0...</td>\n",
       "      <td>[0.531504, 0.531671, 0.531821, 0.5219, 0.52490...</td>\n",
       "      <td>[0.531504, 0.531671, 0.531821, 0.5219, 0.52490...</td>\n",
       "      <td>3</td>\n",
       "      <td>43</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>T4</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>53</td>\n",
       "      <td>4</td>\n",
       "      <td>1408039090</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.613963,41.141169],[-8.614125,41.141124],[...</td>\n",
       "      <td>[-0.217753, -0.21837, -0.221047, -0.222488, -0...</td>\n",
       "      <td>[0.0380801, 0.0352457, 0.0184065, 0.0151053, 0...</td>\n",
       "      <td>[0.0380801, 0.0352457, 0.0184065, 0.0151053, 0...</td>\n",
       "      <td>3</td>\n",
       "      <td>43</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>T5</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>18</td>\n",
       "      <td>5</td>\n",
       "      <td>1408039177</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.619903,41.148036],[-8.619894,41.148036]]</td>\n",
       "      <td>[-0.125114, -0.125114]</td>\n",
       "      <td>[-0.0657565, -0.0656064]</td>\n",
       "      <td>[-0.0657565, -0.0656064, -0.0656064, -0.065606...</td>\n",
       "      <td>3</td>\n",
       "      <td>43</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  TRIP_ID CALL_TYPE  ORIGIN_CALL  ORIGIN_STAND  TAXI_ID   TIMESTAMP  DAY_TYPE  \\\n",
       "0      T1         B            0            15        1  1408039037         0   \n",
       "1      T2         B            0            57        2  1408038611         0   \n",
       "2      T3         B            0            15        3  1408038568         0   \n",
       "3      T4         B            0            53        4  1408039090         0   \n",
       "4      T5         B            0            18        5  1408039177         0   \n",
       "\n",
       "  MISSING_DATA                                           POLYLINE  \\\n",
       "0        False  [[-8.585676,41.148522],[-8.585712,41.148639],[...   \n",
       "1        False  [[-8.610876,41.14557],[-8.610858,41.145579],[-...   \n",
       "2        False  [[-8.585739,41.148558],[-8.58573,41.148828],[-...   \n",
       "3        False  [[-8.613963,41.141169],[-8.614125,41.141124],[...   \n",
       "4        False      [[-8.619903,41.148036],[-8.619894,41.148036]]   \n",
       "\n",
       "                                            LATITUDE  \\\n",
       "0  [-0.118578, -0.116982, -0.1141, -0.113122, -0....   \n",
       "1  [-0.158413, -0.158258, -0.155736, -0.150024, -...   \n",
       "2  [-0.118063, -0.11446, -0.112505, -0.111887, -0...   \n",
       "3  [-0.217753, -0.21837, -0.221047, -0.222488, -0...   \n",
       "4                             [-0.125114, -0.125114]   \n",
       "\n",
       "                                           LONGITUDE  \\\n",
       "0  [0.532604, 0.531971, 0.532454, 0.531671, 0.527...   \n",
       "1  [0.0920491, 0.0923659, 0.0915823, 0.0996017, 0...   \n",
       "2  [0.531504, 0.531671, 0.531821, 0.5219, 0.52490...   \n",
       "3  [0.0380801, 0.0352457, 0.0184065, 0.0151053, 0...   \n",
       "4                           [-0.0657565, -0.0656064]   \n",
       "\n",
       "                                      COORD_FEATURES  DAY_OF_WEEK  \\\n",
       "0  [0.532604, 0.531971, 0.532454, 0.531671, 0.527...            3   \n",
       "1  [0.0920491, 0.0923659, 0.0915823, 0.0996017, 0...            3   \n",
       "2  [0.531504, 0.531671, 0.531821, 0.5219, 0.52490...            3   \n",
       "3  [0.0380801, 0.0352457, 0.0184065, 0.0151053, 0...            3   \n",
       "4  [-0.0657565, -0.0656064, -0.0656064, -0.065606...            3   \n",
       "\n",
       "   QUARTER_HOUR  WEEK_OF_YEAR  \n",
       "0            43            33  \n",
       "1            43            33  \n",
       "2            43            33  \n",
       "3            43            33  \n",
       "4            43            33  "
      ]
     },
     "execution_count": 161,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "utils.save_array(data_path+'train/train_features.bc', train.as_matrix())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "utils.save_array(data_path+'test/test_features.bc', test.as_matrix())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>TRIP_ID</th>\n",
       "      <th>CALL_TYPE</th>\n",
       "      <th>ORIGIN_CALL</th>\n",
       "      <th>ORIGIN_STAND</th>\n",
       "      <th>TAXI_ID</th>\n",
       "      <th>TIMESTAMP</th>\n",
       "      <th>DAY_TYPE</th>\n",
       "      <th>MISSING_DATA</th>\n",
       "      <th>POLYLINE</th>\n",
       "      <th>LATITUDE</th>\n",
       "      <th>LONGITUDE</th>\n",
       "      <th>TARGET</th>\n",
       "      <th>COORD_FEATURES</th>\n",
       "      <th>DAY_OF_WEEK</th>\n",
       "      <th>QUARTER_HOUR</th>\n",
       "      <th>WEEK_OF_YEAR</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1372636858620000589</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1372636858</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.618643,41.141412],[-8.618499,41.141376],[...</td>\n",
       "      <td>[-0.21451, -0.214974, -0.199688, -0.182087, -0...</td>\n",
       "      <td>[-0.0437321, -0.0412145, -0.0731591, -0.105104...</td>\n",
       "      <td>[-8.63084, 41.1545]</td>\n",
       "      <td>[-0.0437321, -0.0412145, -0.0731591, -0.105104...</td>\n",
       "      <td>6</td>\n",
       "      <td>68</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1372637303620000596</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>1372637303</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.639847,41.159826],[-8.640351,41.159871],[...</td>\n",
       "      <td>[0.0339161, 0.0345337, 0.0378275, 0.0429227, 0...</td>\n",
       "      <td>[-0.414429, -0.423249, -0.455494, -0.494991, -...</td>\n",
       "      <td>[-8.66574, 41.1707]</td>\n",
       "      <td>[-0.414429, -0.423249, -0.455494, -0.494991, -...</td>\n",
       "      <td>6</td>\n",
       "      <td>68</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1372636951620000320</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1372636951</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.612964,41.140359],[-8.613378,41.14035],[-...</td>\n",
       "      <td>[-0.228715, -0.228818, -0.229796, -0.228561, -...</td>\n",
       "      <td>[0.0555529, 0.048317, 0.0336785, 0.0239251, 0....</td>\n",
       "      <td>[-8.61597, 41.1405]</td>\n",
       "      <td>[0.0555529, 0.048317, 0.0336785, 0.0239251, 0....</td>\n",
       "      <td>6</td>\n",
       "      <td>68</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1372636854620000520</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1372636854</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.574678,41.151951],[-8.574705,41.151942],[...</td>\n",
       "      <td>[-0.0723098, -0.0724127, -0.0725671, -0.072206...</td>\n",
       "      <td>[0.724872, 0.724405, 0.724572, 0.725189, 0.724...</td>\n",
       "      <td>[-8.608, 41.1429]</td>\n",
       "      <td>[0.724872, 0.724405, 0.724572, 0.725189, 0.724...</td>\n",
       "      <td>6</td>\n",
       "      <td>68</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1372637091620000337</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>1372637091</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.645994,41.18049],[-8.645949,41.180517],[-...</td>\n",
       "      <td>[0.312708, 0.313068, 0.306789, 0.291092, 0.285...</td>\n",
       "      <td>[-0.5219, -0.521117, -0.522834, -0.536055, -0....</td>\n",
       "      <td>[-8.68727, 41.1781]</td>\n",
       "      <td>[-0.5219, -0.521117, -0.522834, -0.536055, -0....</td>\n",
       "      <td>6</td>\n",
       "      <td>68</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               TRIP_ID CALL_TYPE  ORIGIN_CALL  ORIGIN_STAND  TAXI_ID  \\\n",
       "0  1372636858620000589         C            0             0        1   \n",
       "1  1372637303620000596         B            0             7        2   \n",
       "2  1372636951620000320         C            0             0        3   \n",
       "3  1372636854620000520         C            0             0        4   \n",
       "4  1372637091620000337         C            0             0        5   \n",
       "\n",
       "    TIMESTAMP  DAY_TYPE MISSING_DATA  \\\n",
       "0  1372636858         0        False   \n",
       "1  1372637303         0        False   \n",
       "2  1372636951         0        False   \n",
       "3  1372636854         0        False   \n",
       "4  1372637091         0        False   \n",
       "\n",
       "                                            POLYLINE  \\\n",
       "0  [[-8.618643,41.141412],[-8.618499,41.141376],[...   \n",
       "1  [[-8.639847,41.159826],[-8.640351,41.159871],[...   \n",
       "2  [[-8.612964,41.140359],[-8.613378,41.14035],[-...   \n",
       "3  [[-8.574678,41.151951],[-8.574705,41.151942],[...   \n",
       "4  [[-8.645994,41.18049],[-8.645949,41.180517],[-...   \n",
       "\n",
       "                                            LATITUDE  \\\n",
       "0  [-0.21451, -0.214974, -0.199688, -0.182087, -0...   \n",
       "1  [0.0339161, 0.0345337, 0.0378275, 0.0429227, 0...   \n",
       "2  [-0.228715, -0.228818, -0.229796, -0.228561, -...   \n",
       "3  [-0.0723098, -0.0724127, -0.0725671, -0.072206...   \n",
       "4  [0.312708, 0.313068, 0.306789, 0.291092, 0.285...   \n",
       "\n",
       "                                           LONGITUDE               TARGET  \\\n",
       "0  [-0.0437321, -0.0412145, -0.0731591, -0.105104...  [-8.63084, 41.1545]   \n",
       "1  [-0.414429, -0.423249, -0.455494, -0.494991, -...  [-8.66574, 41.1707]   \n",
       "2  [0.0555529, 0.048317, 0.0336785, 0.0239251, 0....  [-8.61597, 41.1405]   \n",
       "3  [0.724872, 0.724405, 0.724572, 0.725189, 0.724...    [-8.608, 41.1429]   \n",
       "4  [-0.5219, -0.521117, -0.522834, -0.536055, -0....  [-8.68727, 41.1781]   \n",
       "\n",
       "                                      COORD_FEATURES  DAY_OF_WEEK  \\\n",
       "0  [-0.0437321, -0.0412145, -0.0731591, -0.105104...            6   \n",
       "1  [-0.414429, -0.423249, -0.455494, -0.494991, -...            6   \n",
       "2  [0.0555529, 0.048317, 0.0336785, 0.0239251, 0....            6   \n",
       "3  [0.724872, 0.724405, 0.724572, 0.725189, 0.724...            6   \n",
       "4  [-0.5219, -0.521117, -0.522834, -0.536055, -0....            6   \n",
       "\n",
       "   QUARTER_HOUR  WEEK_OF_YEAR  \n",
       "0            68            26  \n",
       "1            68            26  \n",
       "2            68            26  \n",
       "3            68            26  \n",
       "4            68            26  "
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## MEANSHIFT"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Meanshift clustering as performed in the paper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train = pd.DataFrame(utils.load_array(data_path+'train/train_features.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n",
    "       'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'DAY_OF_WEEK',\n",
    "                            'QUARTER_HOUR', \"WEEK_OF_YEAR\", \"TARGET\", \"COORD_FEATURES\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Clustering performed on the targets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 532,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "y_targ = np.vstack(train[\"TARGET\"].as_matrix())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 524,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "from sklearn.cluster import MeanShift, estimate_bandwidth"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Can use the commented out code for a estimate of bandwidth, which causes clustering to converge much quicker.\n",
    "\n",
    "This is not mentioned in the paper but is included in the code. In order to get results similar to the paper's,\n",
    "they manually chose the uncommented bandwidth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 533,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "#bw = estimate_bandwidth(y_targ, quantile=.1, n_samples=1000)\n",
    "bw = 0.001"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "This takes some time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 545,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MeanShift(bandwidth=0.001, bin_seeding=True, cluster_all=True, min_bin_freq=5,\n",
       "     n_jobs=1, seeds=None)"
      ]
     },
     "execution_count": 545,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ms = MeanShift(bandwidth=bw, bin_seeding=True, min_bin_freq=5)\n",
    "ms.fit(y_targ)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 546,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "cluster_centers = ms.cluster_centers_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "This is very close to the number of clusters mentioned in the paper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 547,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3421, 2)"
      ]
     },
     "execution_count": 547,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cluster_centers.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 548,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "utils.save_array(data_path+\"cluster_centers_bw_001.bc\", cluster_centers)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Formatting Features for Bcolz iterator / garbage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train = pd.DataFrame(utils.load_array(data_path+'train/train_features.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n",
    "       'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET',\n",
    "                            'COORD_FEATURES', \"DAY_OF_WEEK\", \"QUARTER_HOUR\", \"WEEK_OF_YEAR\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "cluster_centers = utils.load_array(data_path+\"cluster_centers_bw_001.bc\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "long = np.array([c[0] for c in cluster_centers])\n",
    "lat = np.array([c[1] for c in cluster_centers])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_train, X_val = train_test_split(train, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_features(data):\n",
    "    return [np.vstack(data['COORD_FEATURES'].as_matrix()), np.vstack(data['ORIGIN_CALL'].as_matrix()), \n",
    "           np.vstack(data['TAXI_ID'].as_matrix()), np.vstack(data['ORIGIN_STAND'].as_matrix()),\n",
    "           np.vstack(data['QUARTER_HOUR'].as_matrix()), np.vstack(data['DAY_OF_WEEK'].as_matrix()), \n",
    "           np.vstack(data['WEEK_OF_YEAR'].as_matrix()), np.array([long for i in range(0,data.shape[0])]),\n",
    "               np.array([lat for i in range(0,data.shape[0])])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_target(data):\n",
    "    return np.vstack(data[\"TARGET\"].as_matrix())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_train_features = get_features(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_train_target = get_target(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1339328, 20)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "utils.save_array(data_path+'train/X_train_features.bc', get_features(X_train))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MODEL"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load training data and cluster centers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train = pd.DataFrame(utils.load_array(data_path+'train/train_features.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n",
    "       'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET',\n",
    "                            'COORD_FEATURES', \"DAY_OF_WEEK\", \"QUARTER_HOUR\", \"WEEK_OF_YEAR\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Validation cuts "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "cuts = [\n",
    "    1376503200, # 2013-08-14 18:00\n",
    "    1380616200, # 2013-10-01 08:30\n",
    "    1381167900, # 2013-10-07 17:45\n",
    "    1383364800, # 2013-11-02 04:00\n",
    "    1387722600  # 2013-12-22 14:30\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2013-08-14 11:00:00\n"
     ]
    }
   ],
   "source": [
    "print(datetime.datetime.fromtimestamp(1376503200))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1674160, 16)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "val_indices = []\n",
    "index = 0\n",
    "for index, row in train.iterrows():\n",
    "    time = row['TIMESTAMP']\n",
    "    latitude = row['LATITUDE']\n",
    "    for ts in cuts:\n",
    "        if time <= ts and time + 15 * (len(latitude) - 1) >= ts:\n",
    "            val_indices.append(index)\n",
    "            break\n",
    "    index += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_valid = train.iloc[val_indices]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>TRIP_ID</th>\n",
       "      <th>CALL_TYPE</th>\n",
       "      <th>ORIGIN_CALL</th>\n",
       "      <th>ORIGIN_STAND</th>\n",
       "      <th>TAXI_ID</th>\n",
       "      <th>TIMESTAMP</th>\n",
       "      <th>DAY_TYPE</th>\n",
       "      <th>MISSING_DATA</th>\n",
       "      <th>POLYLINE</th>\n",
       "      <th>LATITUDE</th>\n",
       "      <th>LONGITUDE</th>\n",
       "      <th>TARGET</th>\n",
       "      <th>COORD_FEATURES</th>\n",
       "      <th>DAY_OF_WEEK</th>\n",
       "      <th>QUARTER_HOUR</th>\n",
       "      <th>WEEK_OF_YEAR</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>200153</th>\n",
       "      <td>1376502576620000126</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>36</td>\n",
       "      <td>247</td>\n",
       "      <td>1376502576</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.649504,41.15421],[-8.649684,41.154201],[-...</td>\n",
       "      <td>[-0.0418419, -0.0419448, -0.0449813, -0.046422...</td>\n",
       "      <td>[-0.583255, -0.586407, -0.59711, -0.589074, -0...</td>\n",
       "      <td>[-8.61122, 41.1463]</td>\n",
       "      <td>[-0.583255, -0.586407, -0.59711, -0.589074, -0...</td>\n",
       "      <td>2</td>\n",
       "      <td>43</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>200186</th>\n",
       "      <td>1376503146620000161</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>35</td>\n",
       "      <td>19</td>\n",
       "      <td>1376503146</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.649621,41.167323],[-8.64963,41.167251],[-...</td>\n",
       "      <td>[0.135098, 0.134121, 0.126709, 0.125371, 0.124...</td>\n",
       "      <td>[-0.585306, -0.585456, -0.589241, -0.588774, -...</td>\n",
       "      <td>[-8.64504, 41.1586]</td>\n",
       "      <td>[-0.585306, -0.585456, -0.589241, -0.588774, -...</td>\n",
       "      <td>2</td>\n",
       "      <td>43</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>200200</th>\n",
       "      <td>1376502942620000500</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>428</td>\n",
       "      <td>1376502942</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.585694,41.148522],[-8.585712,41.148801],[...</td>\n",
       "      <td>[-0.118578, -0.114821, -0.112402, -0.116982, -...</td>\n",
       "      <td>[0.532287, 0.531971, 0.523018, 0.524735, 0.524...</td>\n",
       "      <td>[-8.61524, 41.1418]</td>\n",
       "      <td>[0.532287, 0.531971, 0.523018, 0.524735, 0.524...</td>\n",
       "      <td>2</td>\n",
       "      <td>43</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>200202</th>\n",
       "      <td>1376502604620000105</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>87</td>\n",
       "      <td>1376502604</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.61093,41.145498],[-8.610939,41.145516],[-...</td>\n",
       "      <td>[-0.15939, -0.159133, -0.153883, -0.145392, -0...</td>\n",
       "      <td>[0.0910987, 0.0909487, 0.093783, 0.108572, 0.1...</td>\n",
       "      <td>[-8.64832, 41.1648]</td>\n",
       "      <td>[0.0910987, 0.0909487, 0.093783, 0.108572, 0.1...</td>\n",
       "      <td>2</td>\n",
       "      <td>43</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>200227</th>\n",
       "      <td>1376502611620000022</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>304</td>\n",
       "      <td>1376502611</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.591301,41.162715],[-8.591004,41.162562],[...</td>\n",
       "      <td>[0.0729274, 0.0708687, 0.0587228, 0.0539879, 0...</td>\n",
       "      <td>[0.43427, 0.439455, 0.42735, 0.423566, 0.41539...</td>\n",
       "      <td>[-8.60977, 41.1512]</td>\n",
       "      <td>[0.43427, 0.439455, 0.42735, 0.423566, 0.41539...</td>\n",
       "      <td>2</td>\n",
       "      <td>43</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    TRIP_ID CALL_TYPE ORIGIN_CALL ORIGIN_STAND TAXI_ID  \\\n",
       "200153  1376502576620000126         B           0           36     247   \n",
       "200186  1376503146620000161         B           0           35      19   \n",
       "200200  1376502942620000500         B           0           15     428   \n",
       "200202  1376502604620000105         C           0            0      87   \n",
       "200227  1376502611620000022         C           0            0     304   \n",
       "\n",
       "         TIMESTAMP DAY_TYPE MISSING_DATA  \\\n",
       "200153  1376502576        0        False   \n",
       "200186  1376503146        0        False   \n",
       "200200  1376502942        0        False   \n",
       "200202  1376502604        0        False   \n",
       "200227  1376502611        0        False   \n",
       "\n",
       "                                                 POLYLINE  \\\n",
       "200153  [[-8.649504,41.15421],[-8.649684,41.154201],[-...   \n",
       "200186  [[-8.649621,41.167323],[-8.64963,41.167251],[-...   \n",
       "200200  [[-8.585694,41.148522],[-8.585712,41.148801],[...   \n",
       "200202  [[-8.61093,41.145498],[-8.610939,41.145516],[-...   \n",
       "200227  [[-8.591301,41.162715],[-8.591004,41.162562],[...   \n",
       "\n",
       "                                                 LATITUDE  \\\n",
       "200153  [-0.0418419, -0.0419448, -0.0449813, -0.046422...   \n",
       "200186  [0.135098, 0.134121, 0.126709, 0.125371, 0.124...   \n",
       "200200  [-0.118578, -0.114821, -0.112402, -0.116982, -...   \n",
       "200202  [-0.15939, -0.159133, -0.153883, -0.145392, -0...   \n",
       "200227  [0.0729274, 0.0708687, 0.0587228, 0.0539879, 0...   \n",
       "\n",
       "                                                LONGITUDE  \\\n",
       "200153  [-0.583255, -0.586407, -0.59711, -0.589074, -0...   \n",
       "200186  [-0.585306, -0.585456, -0.589241, -0.588774, -...   \n",
       "200200  [0.532287, 0.531971, 0.523018, 0.524735, 0.524...   \n",
       "200202  [0.0910987, 0.0909487, 0.093783, 0.108572, 0.1...   \n",
       "200227  [0.43427, 0.439455, 0.42735, 0.423566, 0.41539...   \n",
       "\n",
       "                     TARGET  \\\n",
       "200153  [-8.61122, 41.1463]   \n",
       "200186  [-8.64504, 41.1586]   \n",
       "200200  [-8.61524, 41.1418]   \n",
       "200202  [-8.64832, 41.1648]   \n",
       "200227  [-8.60977, 41.1512]   \n",
       "\n",
       "                                           COORD_FEATURES DAY_OF_WEEK  \\\n",
       "200153  [-0.583255, -0.586407, -0.59711, -0.589074, -0...           2   \n",
       "200186  [-0.585306, -0.585456, -0.589241, -0.588774, -...           2   \n",
       "200200  [0.532287, 0.531971, 0.523018, 0.524735, 0.524...           2   \n",
       "200202  [0.0910987, 0.0909487, 0.093783, 0.108572, 0.1...           2   \n",
       "200227  [0.43427, 0.439455, 0.42735, 0.423566, 0.41539...           2   \n",
       "\n",
       "       QUARTER_HOUR WEEK_OF_YEAR  \n",
       "200153           43           33  \n",
       "200186           43           33  \n",
       "200200           43           33  \n",
       "200202           43           33  \n",
       "200227           43           33  "
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "valid.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2013-08-14 10:49:36\n",
      "2013-08-14 10:59:06\n",
      "2013-08-14 10:55:42\n",
      "2013-08-14 10:50:04\n",
      "2013-08-14 10:50:11\n",
      "2013-08-14 10:56:57\n",
      "2013-08-14 10:36:51\n",
      "2013-08-14 10:44:15\n",
      "2013-08-14 10:55:50\n",
      "2013-08-14 10:50:35\n",
      "2013-08-14 10:50:27\n",
      "2013-08-14 10:43:57\n",
      "2013-08-14 10:16:48\n",
      "2013-08-14 10:40:47\n",
      "2013-08-14 10:45:55\n",
      "2013-08-14 10:43:00\n",
      "2013-08-14 10:53:22\n",
      "2013-08-14 10:50:03\n",
      "2013-08-14 10:26:22\n",
      "2013-08-14 10:59:15\n",
      "2013-08-14 10:50:17\n",
      "2013-08-14 10:56:34\n",
      "2013-08-14 10:53:42\n",
      "2013-08-14 10:47:46\n",
      "2013-08-14 10:58:46\n",
      "2013-08-14 10:24:23\n",
      "2013-08-14 10:55:19\n",
      "2013-08-14 10:57:03\n",
      "2013-08-14 10:56:11\n",
      "2013-08-14 10:56:52\n",
      "2013-08-14 10:57:57\n",
      "2013-08-14 10:08:15\n",
      "2013-08-14 10:51:14\n",
      "2013-08-14 10:58:31\n",
      "2013-08-14 10:47:31\n",
      "2013-08-14 10:30:36\n",
      "2013-08-14 10:17:59\n",
      "2013-08-14 10:48:03\n",
      "2013-08-14 10:55:52\n",
      "2013-08-14 10:49:06\n",
      "2013-08-14 10:58:55\n",
      "2013-08-14 10:51:24\n",
      "2013-08-14 10:54:12\n",
      "2013-08-14 10:54:26\n",
      "2013-08-14 10:51:18\n",
      "2013-08-14 10:59:56\n",
      "2013-08-14 10:48:31\n",
      "2013-08-14 10:51:56\n",
      "2013-08-14 10:39:22\n",
      "2013-08-14 10:57:25\n",
      "2013-08-14 10:57:28\n",
      "2013-08-14 10:57:40\n",
      "2013-08-14 10:39:01\n",
      "2013-08-14 10:50:39\n",
      "2013-08-14 09:48:19\n",
      "2013-10-01 01:16:12\n",
      "2013-10-01 01:28:04\n",
      "2013-10-01 01:18:37\n",
      "2013-10-01 01:24:48\n",
      "2013-10-01 01:23:39\n",
      "2013-10-01 01:28:37\n",
      "2013-10-01 01:20:16\n",
      "2013-10-01 01:23:49\n",
      "2013-10-01 01:27:11\n",
      "2013-10-01 01:06:20\n",
      "2013-10-01 01:28:08\n",
      "2013-10-01 01:29:02\n",
      "2013-10-01 01:24:44\n",
      "2013-10-01 01:24:44\n",
      "2013-10-01 01:19:06\n",
      "2013-10-01 00:28:33\n",
      "2013-10-01 01:29:28\n",
      "2013-10-01 01:27:31\n",
      "2013-10-01 01:22:13\n",
      "2013-10-01 01:26:03\n",
      "2013-10-01 01:28:55\n",
      "2013-10-01 01:18:10\n",
      "2013-10-01 01:22:13\n",
      "2013-10-01 01:14:30\n",
      "2013-10-01 01:24:41\n",
      "2013-10-01 01:22:16\n",
      "2013-10-01 01:25:35\n",
      "2013-10-01 01:21:27\n",
      "2013-10-01 01:11:33\n",
      "2013-10-01 01:10:18\n",
      "2013-10-01 01:09:33\n",
      "2013-10-01 01:01:15\n",
      "2013-10-01 01:17:58\n",
      "2013-10-01 01:18:00\n",
      "2013-10-01 01:13:26\n",
      "2013-10-01 01:18:01\n",
      "2013-10-01 01:25:54\n",
      "2013-10-01 01:21:20\n",
      "2013-10-01 01:25:31\n",
      "2013-10-01 01:25:54\n",
      "2013-10-01 01:23:40\n",
      "2013-10-01 01:26:46\n",
      "2013-10-01 01:23:31\n",
      "2013-10-01 01:17:09\n",
      "2013-10-01 01:21:57\n",
      "2013-10-01 00:29:09\n",
      "2013-10-01 01:14:47\n",
      "2013-10-01 01:04:25\n",
      "2013-10-01 01:14:09\n",
      "2013-10-01 01:16:59\n",
      "2013-10-01 01:27:16\n",
      "2013-10-01 01:16:26\n",
      "2013-10-01 01:23:18\n",
      "2013-10-01 01:16:05\n",
      "2013-10-01 01:27:43\n",
      "2013-10-01 01:08:13\n",
      "2013-10-01 01:19:21\n",
      "2013-10-01 01:21:19\n",
      "2013-10-01 01:24:20\n",
      "2013-10-01 01:26:45\n",
      "2013-10-01 01:18:28\n",
      "2013-10-01 01:19:45\n",
      "2013-10-01 01:28:10\n",
      "2013-10-01 01:22:20\n",
      "2013-10-01 01:18:42\n",
      "2013-10-01 01:19:52\n",
      "2013-10-01 01:18:44\n",
      "2013-10-01 01:15:11\n",
      "2013-10-01 01:19:24\n",
      "2013-10-01 01:23:58\n",
      "2013-10-01 01:28:50\n",
      "2013-10-01 01:13:24\n",
      "2013-10-01 01:28:38\n",
      "2013-10-01 01:24:50\n",
      "2013-10-01 01:14:19\n",
      "2013-10-01 01:10:05\n",
      "2013-10-01 01:26:31\n",
      "2013-10-01 01:28:01\n",
      "2013-09-30 23:44:16\n",
      "2013-10-01 01:21:43\n",
      "2013-10-01 01:26:57\n",
      "2013-10-01 01:25:25\n",
      "2013-10-01 01:25:36\n",
      "2013-10-01 01:16:34\n",
      "2013-10-01 01:26:40\n",
      "2013-10-01 01:14:56\n",
      "2013-10-01 01:13:10\n",
      "2013-10-01 01:28:34\n",
      "2013-10-01 01:19:08\n",
      "2013-10-01 01:24:57\n",
      "2013-10-01 00:52:43\n",
      "2013-10-01 01:25:28\n",
      "2013-10-01 01:22:54\n",
      "2013-10-01 01:28:49\n",
      "2013-10-01 00:13:25\n",
      "2013-10-07 10:34:47\n",
      "2013-10-07 10:38:08\n",
      "2013-10-07 10:31:10\n",
      "2013-10-07 10:35:12\n",
      "2013-10-07 10:41:50\n",
      "2013-10-07 10:34:31\n",
      "2013-10-07 10:42:02\n",
      "2013-10-07 10:39:05\n",
      "2013-10-07 10:31:43\n",
      "2013-10-07 10:34:27\n",
      "2013-10-07 10:31:48\n",
      "2013-10-07 10:42:24\n",
      "2013-10-07 10:38:37\n",
      "2013-10-07 10:29:02\n",
      "2013-10-07 10:33:55\n",
      "2013-10-07 10:17:07\n",
      "2013-10-07 10:44:31\n",
      "2013-10-07 10:42:52\n",
      "2013-10-07 10:26:05\n",
      "2013-10-07 10:34:07\n",
      "2013-10-07 10:40:59\n",
      "2013-10-07 10:41:36\n",
      "2013-10-07 10:33:47\n",
      "2013-10-07 10:30:59\n",
      "2013-10-07 10:38:59\n",
      "2013-10-07 10:28:56\n",
      "2013-10-07 10:41:24\n",
      "2013-10-07 10:41:49\n",
      "2013-10-07 10:42:47\n",
      "2013-10-07 10:34:09\n",
      "2013-10-07 10:40:31\n",
      "2013-10-07 10:21:34\n",
      "2013-10-07 10:43:52\n",
      "2013-10-07 10:18:11\n",
      "2013-10-07 10:41:47\n",
      "2013-10-07 10:33:04\n",
      "2013-10-07 10:40:53\n",
      "2013-10-07 10:36:38\n",
      "2013-10-07 10:41:46\n",
      "2013-10-07 10:03:36\n",
      "2013-10-07 10:44:45\n",
      "2013-10-07 10:21:42\n",
      "2013-10-07 10:24:07\n",
      "2013-10-07 10:40:35\n",
      "2013-10-07 10:41:00\n",
      "2013-10-07 10:43:10\n",
      "2013-10-07 10:23:55\n",
      "2013-10-07 10:43:30\n",
      "2013-10-07 10:25:24\n",
      "2013-10-07 10:35:07\n",
      "2013-10-07 10:43:33\n",
      "2013-10-07 10:39:30\n",
      "2013-10-07 10:31:42\n",
      "2013-10-07 10:39:17\n",
      "2013-10-07 10:42:47\n",
      "2013-10-07 10:39:20\n",
      "2013-10-07 10:44:41\n",
      "2013-10-07 10:24:22\n",
      "2013-10-07 10:12:39\n",
      "2013-10-07 10:37:25\n",
      "2013-10-07 10:42:55\n",
      "2013-10-07 10:14:35\n",
      "2013-10-07 10:37:12\n",
      "2013-10-07 10:32:29\n",
      "2013-10-07 10:42:37\n",
      "2013-10-07 10:26:52\n",
      "2013-10-07 10:31:19\n",
      "2013-10-07 10:44:58\n",
      "2013-11-01 20:47:37\n",
      "2013-11-01 20:54:00\n",
      "2013-11-01 20:58:53\n",
      "2013-11-01 20:56:37\n",
      "2013-11-01 20:56:09\n",
      "2013-11-01 20:51:05\n",
      "2013-11-01 20:50:58\n",
      "2013-11-01 20:55:26\n",
      "2013-11-01 20:53:43\n",
      "2013-11-01 20:53:46\n",
      "2013-11-01 20:54:55\n",
      "2013-11-01 20:59:28\n",
      "2013-11-01 20:56:54\n",
      "2013-11-01 20:50:37\n",
      "2013-11-01 20:48:40\n",
      "2013-11-01 20:55:46\n",
      "2013-11-01 20:45:20\n",
      "2013-11-01 20:46:22\n",
      "2013-11-01 20:48:25\n",
      "2013-11-01 20:47:19\n",
      "2013-11-01 20:57:31\n",
      "2013-11-01 20:58:14\n",
      "2013-11-01 20:49:30\n",
      "2013-11-01 20:43:31\n",
      "2013-11-01 20:59:00\n",
      "2013-11-01 20:54:23\n",
      "2013-11-01 20:51:01\n",
      "2013-11-01 20:38:12\n",
      "2013-11-01 20:59:31\n",
      "2013-11-01 20:56:46\n",
      "2013-11-01 20:53:51\n",
      "2013-11-01 20:48:00\n",
      "2013-11-01 20:58:04\n",
      "2013-11-01 20:52:50\n",
      "2013-11-01 20:58:12\n",
      "2013-11-01 20:57:37\n",
      "2013-11-01 20:53:33\n",
      "2013-11-01 20:54:11\n",
      "2013-11-01 20:48:49\n",
      "2013-11-01 20:42:56\n",
      "2013-11-01 20:55:36\n",
      "2013-11-01 20:51:36\n",
      "2013-11-01 20:48:45\n",
      "2013-11-01 20:49:17\n",
      "2013-11-01 20:53:50\n",
      "2013-11-01 20:45:28\n",
      "2013-11-01 20:45:04\n",
      "2013-11-01 20:52:17\n",
      "2013-11-01 20:52:10\n",
      "2013-11-01 20:59:16\n",
      "2013-11-01 20:51:37\n",
      "2013-11-01 20:50:10\n",
      "2013-12-22 06:24:50\n",
      "2013-12-22 06:04:12\n",
      "2013-12-22 06:16:27\n",
      "2013-12-22 06:23:06\n",
      "2013-12-22 06:24:04\n",
      "2013-12-22 06:17:33\n",
      "2013-12-22 06:22:55\n",
      "2013-12-22 06:24:35\n",
      "2013-12-22 06:21:56\n",
      "2013-12-22 06:22:49\n",
      "2013-12-22 06:25:31\n",
      "2013-12-22 06:21:31\n",
      "2013-12-22 06:27:31\n",
      "2013-12-22 06:29:45\n",
      "2013-12-22 06:26:09\n",
      "2013-12-22 06:17:08\n",
      "2013-12-22 06:26:00\n",
      "2013-12-22 06:20:56\n",
      "2013-12-22 06:23:09\n",
      "2013-12-22 06:22:31\n",
      "2013-12-22 06:29:59\n",
      "2013-12-22 06:27:43\n",
      "2013-12-22 06:23:04\n",
      "2013-12-22 06:25:30\n",
      "2013-12-22 06:19:16\n",
      "2013-12-22 06:23:06\n",
      "2013-12-22 06:26:01\n",
      "2013-12-22 06:19:45\n",
      "2013-12-22 02:34:23\n",
      "2013-12-22 06:29:54\n",
      "2013-12-22 06:28:39\n",
      "2013-12-22 06:27:43\n",
      "2013-12-22 06:16:23\n",
      "2013-12-22 06:17:26\n"
     ]
    }
   ],
   "source": [
    "for d in valid['TIMESTAMP']:\n",
    "    print(datetime.datetime.fromtimestamp(d))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_train = train.drop(train.index[[val_indices]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "cluster_centers = utils.load_array(data_path+\"/data/cluster_centers_bw_001.bc\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "long = np.array([c[0] for c in cluster_centers])\n",
    "lat = np.array([c[1] for c in cluster_centers])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "utils.save_array(data_path+'train/X_train.bc', X_train.as_matrix())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "utils.save_array(data_path+'valid/X_val.bc', X_valid.as_matrix())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_train = pd.DataFrame(utils.load_array(data_path+'train/X_train.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n",
    "       'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET',\n",
    "                            'COORD_FEATURES', \"DAY_OF_WEEK\", \"QUARTER_HOUR\", \"WEEK_OF_YEAR\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_val = pd.DataFrame(utils.load_array(data_path+'valid/X_val.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n",
    "       'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET',\n",
    "                            'COORD_FEATURES', \"DAY_OF_WEEK\", \"QUARTER_HOUR\", \"WEEK_OF_YEAR\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The equirectangular loss function mentioned in the paper.\n",
    "\n",
    "Note: Very important that y[0] is longitude and y[1] is latitude.\n",
    "\n",
    "Omitted the radius of the earth constant \"R\" as it does not affect minimization and units were not given in the paper."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def equirectangular_loss(y_true, y_pred):\n",
    "    deg2rad = 3.141592653589793 / 180\n",
    "    long_1 = y_true[:,0]*deg2rad\n",
    "    long_2 = y_pred[:,0]*deg2rad\n",
    "    lat_1 = y_true[:,1]*deg2rad\n",
    "    lat_2 = y_pred[:,1]*deg2rad\n",
    "    return 6371*K.sqrt(K.square((long_1 - long_2)*K.cos((lat_1 + lat_2)/2.))\n",
    "                       +K.square(lat_1 - lat_2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def embedding_input(name, n_in, n_out, reg):\n",
    "    inp = Input(shape=(1,), dtype='int64', name=name)\n",
    "    return inp, Embedding(n_in, n_out, input_length=1, W_regularizer=l2(reg))(inp)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The following returns a fully-connected model as mentioned in the paper. Takes as input k as defined before, and the cluster centers.\n",
    "\n",
    "Inputs: Embeddings for each category, concatenated w/ the 4*k continous variable representing the first/last k coords as mentioned above.\n",
    "\n",
    "Embeddings have no regularization, as it was not mentioned in paper, though are easily equipped to include.\n",
    "\n",
    "Paper mentions global normalization. Didn't specify exactly how they did that, whether thay did it sequentially or whatnot. I just included a batchnorm layer for the continuous inputs.\n",
    "\n",
    "After concatenation, 1 hidden layer of 500 neurons as called for in paper.\n",
    "\n",
    "Finally, output layer has as many outputs as there are cluster centers, w/ a softmax activation. Call this output P.\n",
    "\n",
    "The prediction is the weighted sum of each cluster center c_i w/ corresponding predicted prob P_i.\n",
    "\n",
    "To facilitate this, dotted output w/ cluster latitudes and longitudes separately. (this happens at variable y), then concatenated \n",
    "    into single tensor.\n",
    "    \n",
    "NOTE!!: You will see that I have the cluster center coords as inputs. Ideally, This function should store the cluster longs/lats as a constant to be used in the model, but I could not figure out. As a consequence, I pass them in as a repeated input."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def taxi_mlp(k, cluster_centers):\n",
    "    shp = cluster_centers.shape[0]\n",
    "    nums = Input(shape=(4*k,))\n",
    "\n",
    "    center_longs = Input(shape=(shp,))\n",
    "    center_lats = Input(shape=(shp,))\n",
    "\n",
    "    emb_names = ['client_ID', 'taxi_ID', \"stand_ID\", \"quarter_hour\", \"day_of_week\", \"week_of_year\"]\n",
    "    emb_ins = [57106, 448, 64, 96, 7, 52]\n",
    "    emb_outs = [10 for i in range(0,6)]\n",
    "    regs = [0 for i in range(0,6)]\n",
    "\n",
    "    embs = [embedding_input(e[0], e[1]+1, e[2], e[3]) for e in zip(emb_names, emb_ins, emb_outs, regs)]\n",
    "\n",
    "    x = merge([nums] + [Flatten()(e[1]) for e in embs], mode='concat')\n",
    "\n",
    "    x = Dense(500, activation='relu')(x)\n",
    "\n",
    "    x = Dense(shp, activation='softmax')(x)\n",
    "\n",
    "    y = merge([merge([x, center_longs], mode='dot'), merge([x, center_lats], mode='dot')], mode='concat')\n",
    "\n",
    "    return Model(input = [nums]+[e[0] for e in embs] + [center_longs, center_lats], output = y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "As mentioned, construction of repeated cluster longs/lats for input"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Iterator for in memory `train` pandas dataframe. I did this as opposed to bcolz iterator due to the pre-processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def data_iter(data, batch_size, cluster_centers):\n",
    "    long = [c[0] for c in cluster_centers]\n",
    "    lat = [c[1] for c in cluster_centers]\n",
    "    i = 0\n",
    "    N = data.shape[0]\n",
    "    while True:\n",
    "        yield ([np.vstack(data['COORD_FEATURES'][i:i+batch_size].as_matrix()), np.vstack(data['ORIGIN_CALL'][i:i+batch_size].as_matrix()), \n",
    "           np.vstack(data['TAXI_ID'][i:i+batch_size].as_matrix()), np.vstack(data['ORIGIN_STAND'][i:i+batch_size].as_matrix()),\n",
    "           np.vstack(data['QUARTER_HOUR'][i:i+batch_size].as_matrix()), np.vstack(data['DAY_OF_WEEK'][i:i+batch_size].as_matrix()), \n",
    "           np.vstack(data['WEEK_OF_YEAR'][i:i+batch_size].as_matrix()), np.array([long for i in range(0,batch_size)]),\n",
    "               np.array([lat for i in range(0,batch_size)])], np.vstack(data[\"TARGET\"][i:i+batch_size].as_matrix()))\n",
    "        i += batch_size\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x=Lambda(thing)([x,long,lat])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Of course, k in the model needs to match k from feature construction. We again use 5 as they did in the paper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "model = taxi_mlp(5, cluster_centers)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Paper used SGD opt w/ following paramerters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "model.compile(optimizer=SGD(0.01, momentum=0.9), loss=equirectangular_loss, metrics=['mse'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_train_feat = get_features(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_train_target = get_target(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_val_feat = get_features(X_valid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_val_target = get_target(X_valid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tqdm = TQDMNotebookCallback()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "checkpoint = ModelCheckpoint(filepath=data_path+'models/tmp/weights.{epoch:03d}.{val_loss:.8f}.hdf5', save_best_only=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "batch_size=256"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "### original"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\r",
      "          \r",
      "5272/|/[loss: 0.469, mean_squared_error: 0.000] 100%|| 5272/5273 [01:54<00:00, 47.14it/s]"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fb2bb8a19e8>"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(X_train_feat, X_train_target, nb_epoch=1, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5272/|/[loss: 0.107, mean_squared_error: 0.000] 100%|| 5272/5273 [01:54<00:00, 49.65it/s]"
     ]
    }
   ],
   "source": [
    "model.fit(X_train_feat, X_train_target, nb_epoch=30, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "model = load_model(data_path+'models/weights.0.0799.hdf5', custom_objects={'equirectangular_loss':equirectangular_loss})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "5231/|/[loss: 0.074, mean_squared_error: 0.000] 100%|| 5231/5232 [01:58<00:00, 50.19it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fced25954a8>"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(X_train_feat, X_train_target, nb_epoch=100, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "model.save(data_path+'models/current_model.hdf5')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### new valid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7f82d815c550>"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(X_train_feat, X_train_target, nb_epoch=1, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model.fit(X_train_feat, X_train_target, nb_epoch=400, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "model.save(data_path+'/models/current_model.hdf5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "304"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(X_val_feat[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It works, but it seems to converge unrealistically quick and the loss values are not the same. The paper does not mention what it's using as \"error\" in it's results. I assume the same equirectangular? Not very clear. The difference in values could be due to the missing Earth-radius factor"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Kaggle Entry"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "best_model = load_model(data_path+'models/weights.308.0.03373993.hdf5', custom_objects={'equirectangular_loss':equirectangular_loss})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\r",
      " 32/304 [==>...........................] - ETA: 0s"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[0.033743755401749363, 2.5798687967213293e-07]"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_model.evaluate(X_val_feat, X_val_target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "test = pd.DataFrame(utils.load_array(data_path+'test/test_features.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n",
    "       'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE',\n",
    "                            'COORD_FEATURES', \"DAY_OF_WEEK\", \"QUARTER_HOUR\", \"WEEK_OF_YEAR\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "test['ORIGIN_CALL'] = pd.read_csv(data_path+'real_origin_call.csv', header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "test['TAXI_ID'] = pd.read_csv(data_path+'real_taxi_id.csv',header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_test = get_features(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "b = np.sort(X_test[1],axis=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "test_preds = np.round(best_model.predict(X_test), decimals=6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "d = {0:test['TRIP_ID'], 1:test_preds[:,1], 2:test_preds[:,0]}\n",
    "kaggle_out = pd.DataFrame(data=d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "kaggle_out.to_csv(data_path+'submission.csv', header=['TRIP_ID','LATITUDE', 'LONGITUDE'], index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def hdist(a, b):\n",
    "    deg2rad = 3.141592653589793 / 180\n",
    "\n",
    "    lat1 = a[:, 1] * deg2rad\n",
    "    lon1 = a[:, 0] * deg2rad\n",
    "    lat2 = b[:, 1] * deg2rad\n",
    "    lon2 = b[:, 0] * deg2rad\n",
    "\n",
    "    dlat = abs(lat1-lat2)\n",
    "    dlon = abs(lon1-lon2)\n",
    "\n",
    "    al = np.sin(dlat/2)**2  + np.cos(lat1) * np.cos(lat2) * (np.sin(dlon/2)**2)\n",
    "    d = np.arctan2(np.sqrt(al), np.sqrt(1-al))\n",
    "\n",
    "    hd = 2 * 6371 * d\n",
    "\n",
    "    return hd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "val_preds = best_model.predict(X_val_feat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-88-7606f80b50cf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrn_preds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_feat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose)\u001b[0m\n\u001b[1;32m   1270\u001b[0m         \u001b[0mf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1271\u001b[0m         return self._predict_loop(f, ins,\n\u001b[0;32m-> 1272\u001b[0;31m                                   batch_size=batch_size, verbose=verbose)\n\u001b[0m\u001b[1;32m   1273\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1274\u001b[0m     def train_on_batch(self, x, y,\n",
      "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_predict_loop\u001b[0;34m(self, f, ins, batch_size, verbose)\u001b[0m\n\u001b[1;32m    943\u001b[0m                 \u001b[0mins_batch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mslice_X\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_ids\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    944\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 945\u001b[0;31m             \u001b[0mbatch_outs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    946\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_outs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    947\u001b[0m                 \u001b[0mbatch_outs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mbatch_outs\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/keras/backend/theano_backend.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m    957\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    958\u001b[0m         \u001b[0;32massert\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 959\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    960\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    961\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/theano/compile/function_module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    857\u001b[0m         \u001b[0mt0_fn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    858\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 859\u001b[0;31m             \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    860\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    861\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'position_of_error'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "trn_preds = model.predict(X_train_feat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "er = hdist(val_preds, X_val_target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.033741556"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "er.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "K.equal()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To-do: simple to extend to validation data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## Uh oh... training data not representative of test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "cuts = [\n",
    "    1376503200, # 2013-08-14 18:00\n",
    "    1380616200, # 2013-10-01 08:30\n",
    "    1381167900, # 2013-10-07 17:45\n",
    "    1383364800, # 2013-11-02 04:00\n",
    "    1387722600  # 2013-12-22 14:30\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.any([train['TIMESTAMP'].map(lambda x: x in cuts)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0          1372636858\n",
       "1          1372637303\n",
       "2          1372636951\n",
       "3          1372636854\n",
       "4          1372637091\n",
       "5          1372636965\n",
       "6          1372637210\n",
       "7          1372637299\n",
       "8          1372637274\n",
       "9          1372637905\n",
       "10         1372636875\n",
       "11         1372637984\n",
       "12         1372637343\n",
       "13         1372638595\n",
       "14         1372638151\n",
       "15         1372637610\n",
       "16         1372638481\n",
       "17         1372639135\n",
       "18         1372637482\n",
       "19         1372639181\n",
       "20         1372638161\n",
       "21         1372637254\n",
       "22         1372638502\n",
       "23         1372639960\n",
       "24         1372637658\n",
       "25         1372639092\n",
       "26         1372639535\n",
       "27         1372640499\n",
       "28         1372639635\n",
       "29         1372640555\n",
       "              ...    \n",
       "1710640    1404151621\n",
       "1710641    1404152121\n",
       "1710642    1404170192\n",
       "1710643    1386603894\n",
       "1710644    1401596832\n",
       "1710645    1404151410\n",
       "1710646    1404172198\n",
       "1710647    1404155241\n",
       "1710648    1404171548\n",
       "1710649    1404151498\n",
       "1710650    1404168899\n",
       "1710651    1404153627\n",
       "1710652    1401475142\n",
       "1710653    1403935197\n",
       "1710654    1404166892\n",
       "1710655    1404143157\n",
       "1710656    1404014448\n",
       "1710657    1380123541\n",
       "1710658    1373986578\n",
       "1710659    1403941536\n",
       "1710660    1384165182\n",
       "1710661    1404164723\n",
       "1710662    1404155105\n",
       "1710663    1388660427\n",
       "1710664    1390403767\n",
       "1710665    1404171463\n",
       "1710666    1404171367\n",
       "1710667    1388745716\n",
       "1710668    1404141826\n",
       "1710669    1404157147\n",
       "Name: TIMESTAMP, dtype: int64"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['TIMESTAMP']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.any(train['TIMESTAMP']==1381167900)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "times = train['TIMESTAMP'].as_matrix()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n",
       "       'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE',\n",
       "       'LONGITUDE', 'TARGET', 'COORD_FEATURES', 'DAY_OF_WEEK', 'QUARTER_HOUR',\n",
       "       'WEEK_OF_YEAR'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1372636858, 1372637303, 1372636951, ..., 1388745716, 1404141826, 1404157147])"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "times"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "\n",
    "count = 0\n",
    "for index, row in X_val.iterrows():\n",
    "    for ts in cuts:\n",
    "        time = row['TIMESTAMP']\n",
    "        latitude = row['LATITUDE']\n",
    "        if time <= ts and time + 15 * (len(latitude) - 1) >= ts:\n",
    "            count += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "one = count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "304"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count + one"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "import h5py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "h = h5py.File(data_path+'original/data.hdf5', 'r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'Unable to open object (Component not found)'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-15-4bc7106cf938>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mevrData\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'/Configure:0000/Run:0000/CalibCycle:0000/EvrData::DataV3/NoDetector.0:Evr.0/data'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32mh5py/_objects.pyx\u001b[0m in \u001b[0;36mh5py._objects.with_phil.wrapper (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/_objects.c:2856)\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mh5py/_objects.pyx\u001b[0m in \u001b[0;36mh5py._objects.with_phil.wrapper (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/_objects.c:2814)\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/h5py/_hl/group.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m    164\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Invalid HDF5 object reference\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    165\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m             \u001b[0moid\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh5o\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_e\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlapl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lapl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    167\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    168\u001b[0m         \u001b[0motype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh5i\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mh5py/_objects.pyx\u001b[0m in \u001b[0;36mh5py._objects.with_phil.wrapper (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/_objects.c:2856)\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mh5py/_objects.pyx\u001b[0m in \u001b[0;36mh5py._objects.with_phil.wrapper (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/_objects.c:2814)\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mh5py/h5o.pyx\u001b[0m in \u001b[0;36mh5py.h5o.open (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/h5o.c:3742)\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: 'Unable to open object (Component not found)'"
     ]
    }
   ],
   "source": [
    "evrData=h['/Configure:0000/Run:0000/CalibCycle:0000/EvrData::DataV3/NoDetector.0:Evr.0/data']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "ename": "OSError",
     "evalue": "Failed to interpret file '/data/bckenstler/data/taxi/original/arrival-clusters.pkl' as a pickle",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mUnicodeDecodeError\u001b[0m                        Traceback (most recent call last)",
      "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/numpy/lib/npyio.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(file, mmap_mode, allow_pickle, fix_imports, encoding)\u001b[0m\n\u001b[1;32m    412\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 413\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mpickle_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    414\u001b[0m             \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mUnicodeDecodeError\u001b[0m: 'ascii' codec can't decode byte 0xf7 in position 0: ordinal not in range(128)",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mOSError\u001b[0m                                   Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-13-2213758ffef0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_path\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m'original/arrival-clusters.pkl'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/numpy/lib/npyio.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(file, mmap_mode, allow_pickle, fix_imports, encoding)\u001b[0m\n\u001b[1;32m    414\u001b[0m             \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    415\u001b[0m                 raise IOError(\n\u001b[0;32m--> 416\u001b[0;31m                     \"Failed to interpret file %s as a pickle\" % repr(file))\n\u001b[0m\u001b[1;32m    417\u001b[0m     \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    418\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mown_fid\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mOSError\u001b[0m: Failed to interpret file '/data/bckenstler/data/taxi/original/arrival-clusters.pkl' as a pickle"
     ]
    }
   ],
   "source": [
    "c = np.load(data_path+'original/arrival-clusters.pkl')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "### hd5f files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "from fuel.utils import find_in_data_path\n",
    "from fuel.datasets import H5PYDataset\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "original_path = '/data/bckenstler/data/taxi/original/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train_set = H5PYDataset(original_path+'data.hdf5', which_sets=('train',),load_in_memory=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "valid_set = H5PYDataset(original_path+'valid.hdf5', which_sets=('cuts/test_times_0',),load_in_memory=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1710670\n"
     ]
    }
   ],
   "source": [
    "print(train_set.num_examples)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "304\n"
     ]
    }
   ],
   "source": [
    "print(valid_set.num_examples)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "data = train_set.data_sources"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2, 1, 2, ..., 2, 1, 1], dtype=int8)"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "valid_data = valid_set.data_sources"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 41.1542,  41.1542,  41.154 ,  41.1539,  41.1542,  41.1544,  41.1542,  41.1538,  41.1533,\n",
       "        41.1528,  41.1525,  41.1525,  41.1527,  41.1527,  41.1527,  41.1526,  41.1524,  41.1526,\n",
       "        41.1526,  41.1522,  41.1508,  41.1507,  41.1497,  41.1489,  41.1489,  41.1486,  41.1479,\n",
       "        41.1475,  41.1468,  41.1461,  41.1463,  41.1464,  41.146 ,  41.1449,  41.1451,  41.1454,\n",
       "        41.1458,  41.1459,  41.1458,  41.1459,  41.146 ,  41.146 ], dtype=float32)"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "valid_data[4][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "stamps = valid_data[-3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1376502576"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stamps[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n"
     ]
    }
   ],
   "source": [
    "for i in range(0,304):    \n",
    "    print(np.any([t==int(stamps[i]) for t in X_val['TIMESTAMP']]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "int"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(X_train['TIMESTAMP'][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "numpy.int32"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(stamps[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "check = [s in stamps for s in X_val['TIMESTAMP']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2013-08-14 10:07:32\n",
      "2013-08-14 10:14:21\n",
      "2013-08-14 10:28:47\n",
      "2013-08-14 10:36:23\n",
      "2013-08-14 10:25:13\n",
      "2013-08-14 10:31:23\n",
      "2013-08-14 10:14:21\n",
      "2013-08-14 10:14:13\n",
      "2013-08-14 10:03:40\n",
      "2013-08-14 11:06:08\n",
      "2013-08-14 11:00:40\n",
      "2013-08-14 11:18:32\n",
      "2013-08-14 10:51:01\n",
      "2013-08-14 10:15:37\n",
      "2013-08-14 10:42:00\n",
      "2013-08-14 09:15:51\n",
      "2013-08-14 10:35:23\n",
      "2013-08-14 11:05:51\n",
      "2013-08-14 11:16:11\n",
      "2013-08-14 11:47:27\n",
      "2013-08-14 11:35:11\n",
      "2013-08-14 11:43:53\n",
      "2013-08-14 12:01:14\n",
      "2013-08-14 11:09:23\n",
      "2013-08-14 10:26:21\n",
      "2013-08-14 11:22:43\n",
      "2013-08-14 12:07:18\n",
      "2013-08-14 10:29:38\n",
      "2013-08-14 11:57:18\n",
      "2013-08-14 11:23:06\n",
      "2013-08-14 12:15:02\n",
      "2013-08-14 11:06:17\n",
      "2013-08-14 12:33:55\n",
      "2013-08-13 22:42:40\n",
      "2013-08-14 12:07:26\n",
      "2013-08-14 09:02:36\n",
      "2013-08-14 13:08:03\n",
      "2013-08-14 07:25:36\n",
      "2013-08-14 13:37:10\n",
      "2013-08-14 13:52:50\n",
      "2013-08-14 14:24:04\n",
      "2013-08-14 15:15:05\n",
      "2013-08-14 15:41:34\n",
      "2013-08-14 19:15:39\n",
      "2013-08-14 20:28:13\n",
      "2013-08-14 19:58:07\n",
      "2013-08-14 21:43:57\n",
      "2013-08-14 21:41:07\n",
      "2013-08-14 22:46:27\n",
      "2013-08-14 23:11:28\n",
      "2013-08-15 00:01:02\n",
      "2013-08-15 01:40:11\n",
      "2013-08-15 01:31:05\n",
      "2013-08-15 04:04:21\n",
      "2013-08-29 01:54:35\n",
      "2013-09-30 07:58:58\n",
      "2013-10-01 00:57:30\n",
      "2013-10-01 01:14:21\n",
      "2013-10-01 01:07:59\n",
      "2013-10-01 01:12:46\n",
      "2013-10-01 01:15:23\n",
      "2013-10-01 00:56:55\n",
      "2013-10-01 01:34:44\n",
      "2013-09-30 10:05:15\n",
      "2013-10-01 01:13:14\n",
      "2013-10-01 00:50:05\n",
      "2013-10-01 01:12:57\n",
      "2013-10-01 01:34:34\n",
      "2013-09-30 08:35:34\n",
      "2013-10-01 01:39:09\n",
      "2013-10-01 00:58:29\n",
      "2013-10-01 00:53:42\n",
      "2013-09-30 08:50:42\n",
      "2013-10-01 00:59:11\n",
      "2013-10-01 01:46:21\n",
      "2013-10-01 00:57:02\n",
      "2013-10-01 01:30:39\n",
      "2013-10-01 00:40:31\n",
      "2013-10-01 01:49:07\n",
      "2013-10-01 01:52:21\n",
      "2013-10-01 00:43:41\n",
      "2013-10-01 02:06:20\n",
      "2013-10-01 01:54:00\n",
      "2013-10-01 01:13:36\n",
      "2013-10-01 00:55:21\n",
      "2013-10-01 02:00:42\n",
      "2013-09-30 16:02:13\n",
      "2013-10-01 01:55:31\n",
      "2013-10-01 01:20:36\n",
      "2013-09-30 14:18:09\n",
      "2013-10-01 02:12:26\n",
      "2013-10-01 01:46:34\n",
      "2013-10-01 01:58:42\n",
      "2013-10-01 01:59:55\n",
      "2013-10-01 01:48:49\n",
      "2013-10-01 01:50:59\n",
      "2013-10-01 00:33:27\n",
      "2013-09-30 13:02:58\n",
      "2013-10-01 01:20:56\n",
      "2013-10-01 02:05:00\n",
      "2013-10-01 01:42:57\n",
      "2013-10-01 01:37:42\n",
      "2013-10-01 01:51:28\n",
      "2013-10-01 01:40:01\n",
      "2013-10-01 01:53:52\n",
      "2013-10-01 02:18:16\n",
      "2013-10-01 02:20:50\n",
      "2013-10-01 02:22:04\n",
      "2013-10-01 01:38:33\n",
      "2013-10-01 01:53:27\n",
      "2013-10-01 01:59:50\n",
      "2013-10-01 00:59:27\n",
      "2013-10-01 01:53:45\n",
      "2013-10-01 02:11:18\n",
      "2013-10-01 01:51:55\n",
      "2013-10-01 01:46:14\n",
      "2013-10-01 01:49:47\n",
      "2013-10-01 02:17:16\n",
      "2013-10-01 01:57:39\n",
      "2013-10-01 02:09:57\n",
      "2013-10-01 02:36:04\n",
      "2013-10-01 01:51:49\n",
      "2013-10-01 02:10:14\n",
      "2013-10-01 02:15:34\n",
      "2013-10-01 02:03:47\n",
      "2013-10-01 02:01:06\n",
      "2013-10-01 02:02:54\n",
      "2013-10-01 02:39:46\n",
      "2013-09-30 14:47:45\n",
      "2013-10-01 02:34:19\n",
      "2013-10-01 01:55:35\n",
      "2013-10-01 02:04:15\n",
      "2013-10-01 02:25:37\n",
      "2013-10-01 02:53:51\n",
      "2013-10-01 02:21:52\n",
      "2013-10-01 02:17:23\n",
      "2013-10-01 02:52:09\n",
      "2013-10-01 03:10:34\n",
      "2013-10-01 02:50:11\n",
      "2013-10-01 02:17:02\n",
      "2013-10-01 02:51:34\n",
      "2013-10-01 02:47:29\n",
      "2013-10-01 02:47:58\n",
      "2013-10-01 02:48:11\n",
      "2013-10-01 02:44:48\n",
      "2013-10-01 02:55:34\n",
      "2013-10-01 03:06:12\n",
      "2013-10-01 04:22:22\n",
      "2013-10-01 03:55:25\n",
      "2013-10-01 09:55:50\n",
      "2013-10-07 09:39:25\n",
      "2013-10-07 10:22:21\n",
      "2013-10-07 04:17:58\n",
      "2013-10-07 10:25:18\n",
      "2013-10-07 07:28:48\n",
      "2013-10-07 09:53:31\n",
      "2013-10-07 10:28:40\n",
      "2013-10-07 09:43:36\n",
      "2013-10-07 11:33:33\n",
      "2013-10-07 09:47:13\n",
      "2013-10-07 10:45:36\n",
      "2013-10-07 11:36:41\n",
      "2013-10-07 12:02:04\n",
      "2013-10-07 11:37:48\n",
      "2013-10-07 11:52:38\n",
      "2013-10-07 12:06:22\n",
      "2013-10-07 11:34:34\n",
      "2013-10-07 10:18:22\n",
      "2013-10-07 11:31:49\n",
      "2013-10-07 11:54:39\n",
      "2013-10-07 11:15:50\n",
      "2013-10-07 11:25:14\n",
      "2013-10-07 12:22:42\n",
      "2013-10-07 11:58:31\n",
      "2013-10-07 11:56:48\n",
      "2013-10-07 11:58:08\n",
      "2013-10-07 11:59:03\n",
      "2013-10-07 06:53:29\n",
      "2013-10-07 08:41:29\n",
      "2013-10-07 12:23:19\n",
      "2013-10-07 12:13:27\n",
      "2013-10-07 12:52:41\n",
      "2013-10-07 10:52:23\n",
      "2013-10-07 11:12:36\n",
      "2013-10-07 12:53:53\n",
      "2013-10-07 12:45:15\n",
      "2013-10-07 12:54:38\n",
      "2013-10-07 10:46:32\n",
      "2013-10-07 11:54:15\n",
      "2013-10-07 11:52:09\n",
      "2013-10-07 12:01:28\n",
      "2013-10-07 11:35:00\n",
      "2013-10-07 12:24:21\n",
      "2013-10-07 13:07:04\n",
      "2013-10-07 13:40:22\n",
      "2013-10-07 13:47:05\n",
      "2013-10-07 10:10:45\n",
      "2013-10-07 13:28:27\n",
      "2013-10-07 12:35:05\n",
      "2013-10-07 13:09:15\n",
      "2013-10-07 11:44:18\n",
      "2013-10-07 14:42:34\n",
      "2013-10-07 13:24:59\n",
      "2013-10-07 13:11:00\n",
      "2013-10-07 14:10:43\n",
      "2013-10-07 15:09:55\n",
      "2013-10-07 22:16:07\n",
      "2013-10-07 21:46:40\n",
      "2013-10-07 23:43:29\n",
      "2013-10-07 09:15:06\n",
      "2013-10-07 19:40:37\n",
      "2013-10-08 00:10:51\n",
      "2013-10-07 12:39:02\n",
      "2013-10-07 13:55:44\n",
      "2013-10-08 00:31:15\n",
      "2013-10-07 23:57:18\n",
      "2013-10-08 01:08:20\n",
      "2013-10-08 04:09:15\n",
      "2013-11-01 21:32:47\n",
      "2013-11-01 21:14:53\n",
      "2013-11-01 21:39:50\n",
      "2013-11-01 21:30:52\n",
      "2013-11-01 21:20:27\n",
      "2013-11-01 21:09:21\n",
      "2013-11-01 21:22:48\n",
      "2013-11-01 21:38:38\n",
      "2013-11-01 21:05:58\n",
      "2013-11-01 21:38:29\n",
      "2013-11-01 20:24:41\n",
      "2013-11-01 21:45:04\n",
      "2013-11-01 21:32:47\n",
      "2013-11-01 21:06:05\n",
      "2013-11-01 21:32:46\n",
      "2013-11-01 21:40:51\n",
      "2013-11-01 21:37:10\n",
      "2013-11-01 20:36:02\n",
      "2013-11-01 21:45:05\n",
      "2013-11-01 21:33:28\n",
      "2013-11-01 21:49:08\n",
      "2013-11-01 21:37:25\n",
      "2013-11-01 21:51:12\n",
      "2013-11-01 21:13:05\n",
      "2013-11-01 21:33:50\n",
      "2013-11-01 21:35:31\n",
      "2013-11-01 21:46:46\n",
      "2013-11-01 21:37:35\n",
      "2013-11-01 21:42:36\n",
      "2013-11-01 21:53:26\n",
      "2013-11-01 22:01:40\n",
      "2013-11-01 21:38:20\n",
      "2013-11-01 21:36:27\n",
      "2013-11-01 22:05:16\n",
      "2013-11-01 21:59:10\n",
      "2013-11-01 18:00:02\n",
      "2013-11-01 22:09:29\n",
      "2013-11-01 21:58:45\n",
      "2013-11-01 22:16:30\n",
      "2013-11-01 21:06:47\n",
      "2013-11-01 22:21:46\n",
      "2013-11-01 22:12:47\n",
      "2013-11-01 22:10:46\n",
      "2013-11-01 22:20:50\n",
      "2013-11-01 21:52:14\n",
      "2013-11-01 22:12:02\n",
      "2013-11-01 22:12:30\n",
      "2013-11-01 22:59:32\n",
      "2013-11-01 22:11:17\n",
      "2013-11-01 23:35:01\n",
      "2013-11-01 23:27:56\n",
      "2013-11-02 09:37:04\n",
      "2013-12-22 06:39:00\n",
      "2013-12-22 06:39:18\n",
      "2013-12-22 06:56:09\n",
      "2013-12-22 07:57:34\n",
      "2013-12-22 07:19:53\n",
      "2013-12-22 07:33:46\n",
      "2013-12-22 08:01:08\n",
      "2013-12-22 08:01:17\n",
      "2013-12-22 08:29:30\n",
      "2013-12-22 08:01:29\n",
      "2013-12-22 07:45:23\n",
      "2013-12-22 08:08:20\n",
      "2013-12-22 08:30:08\n",
      "2013-12-21 13:07:37\n",
      "2013-12-22 07:51:17\n",
      "2013-12-22 07:11:40\n",
      "2013-12-22 08:57:33\n",
      "2013-12-22 08:49:51\n",
      "2013-12-22 06:49:38\n",
      "2013-12-22 09:00:47\n",
      "2013-12-22 09:36:42\n",
      "2013-12-22 09:02:56\n",
      "2013-12-22 08:21:05\n",
      "2013-12-22 10:05:26\n",
      "2013-12-22 04:01:53\n",
      "2013-12-22 10:02:21\n",
      "2013-12-22 08:54:18\n",
      "2013-12-22 10:31:35\n",
      "2013-12-22 10:37:30\n",
      "2013-12-22 11:28:57\n",
      "2013-12-22 11:56:01\n",
      "2013-12-22 15:40:59\n",
      "2013-12-22 10:02:07\n",
      "2013-12-23 00:48:48\n"
     ]
    }
   ],
   "source": [
    "for s in X_val['TIMESTAMP']:\n",
    "    print(datetime.datetime.fromtimestamp(s))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2013-08-14 10:49:36\n",
      "2013-08-14 10:59:06\n",
      "2013-08-14 10:55:42\n",
      "2013-08-14 10:50:04\n",
      "2013-08-14 10:50:11\n",
      "2013-08-14 10:56:57\n",
      "2013-08-14 10:36:51\n",
      "2013-08-14 10:44:15\n",
      "2013-08-14 10:55:50\n",
      "2013-08-14 10:50:35\n",
      "2013-08-14 10:50:27\n",
      "2013-08-14 10:43:57\n",
      "2013-08-14 10:16:48\n",
      "2013-08-14 10:40:47\n",
      "2013-08-14 10:45:55\n",
      "2013-08-14 10:43:00\n",
      "2013-08-14 10:53:22\n",
      "2013-08-14 10:50:03\n",
      "2013-08-14 10:26:22\n",
      "2013-08-14 10:59:15\n",
      "2013-08-14 10:50:17\n",
      "2013-08-14 10:56:34\n",
      "2013-08-14 10:53:42\n",
      "2013-08-14 10:47:46\n",
      "2013-08-14 10:58:46\n",
      "2013-08-14 10:24:23\n",
      "2013-08-14 10:55:19\n",
      "2013-08-14 10:57:03\n",
      "2013-08-14 10:56:11\n",
      "2013-08-14 10:56:52\n",
      "2013-08-14 10:57:57\n",
      "2013-08-14 10:08:15\n",
      "2013-08-14 10:51:14\n",
      "2013-08-14 10:58:31\n",
      "2013-08-14 10:47:31\n",
      "2013-08-14 10:30:36\n",
      "2013-08-14 10:17:59\n",
      "2013-08-14 10:48:03\n",
      "2013-08-14 10:55:52\n",
      "2013-08-14 10:49:06\n",
      "2013-08-14 10:58:55\n",
      "2013-08-14 10:51:24\n",
      "2013-08-14 10:54:12\n",
      "2013-08-14 10:54:26\n",
      "2013-08-14 10:51:18\n",
      "2013-08-14 10:59:56\n",
      "2013-08-14 10:48:31\n",
      "2013-08-14 10:51:56\n",
      "2013-08-14 10:39:22\n",
      "2013-08-14 10:57:25\n",
      "2013-08-14 10:57:28\n",
      "2013-08-14 10:57:40\n",
      "2013-08-14 10:39:01\n",
      "2013-08-14 10:50:39\n",
      "2013-08-14 09:48:19\n",
      "2013-10-01 01:16:12\n",
      "2013-10-01 01:28:04\n",
      "2013-10-01 01:18:37\n",
      "2013-10-01 01:24:48\n",
      "2013-10-01 01:23:39\n",
      "2013-10-01 01:28:37\n",
      "2013-10-01 01:20:16\n",
      "2013-10-01 01:23:49\n",
      "2013-10-01 01:27:11\n",
      "2013-10-01 01:06:20\n",
      "2013-10-01 01:28:08\n",
      "2013-10-01 01:29:02\n",
      "2013-10-01 01:24:44\n",
      "2013-10-01 01:24:44\n",
      "2013-10-01 01:19:06\n",
      "2013-10-01 00:28:33\n",
      "2013-10-01 01:29:28\n",
      "2013-10-01 01:27:31\n",
      "2013-10-01 01:22:13\n",
      "2013-10-01 01:26:03\n",
      "2013-10-01 01:28:55\n",
      "2013-10-01 01:18:10\n",
      "2013-10-01 01:22:13\n",
      "2013-10-01 01:14:30\n",
      "2013-10-01 01:24:41\n",
      "2013-10-01 01:22:16\n",
      "2013-10-01 01:25:35\n",
      "2013-10-01 01:21:27\n",
      "2013-10-01 01:11:33\n",
      "2013-10-01 01:10:18\n",
      "2013-10-01 01:09:33\n",
      "2013-10-01 01:01:15\n",
      "2013-10-01 01:17:58\n",
      "2013-10-01 01:18:00\n",
      "2013-10-01 01:13:26\n",
      "2013-10-01 01:18:01\n",
      "2013-10-01 01:25:54\n",
      "2013-10-01 01:21:20\n",
      "2013-10-01 01:25:31\n",
      "2013-10-01 01:25:54\n",
      "2013-10-01 01:23:40\n",
      "2013-10-01 01:26:46\n",
      "2013-10-01 01:23:31\n",
      "2013-10-01 01:17:09\n",
      "2013-10-01 01:21:57\n",
      "2013-10-01 00:29:09\n",
      "2013-10-01 01:14:47\n",
      "2013-10-01 01:04:25\n",
      "2013-10-01 01:14:09\n",
      "2013-10-01 01:16:59\n",
      "2013-10-01 01:27:16\n",
      "2013-10-01 01:16:26\n",
      "2013-10-01 01:23:18\n",
      "2013-10-01 01:16:05\n",
      "2013-10-01 01:27:43\n",
      "2013-10-01 01:08:13\n",
      "2013-10-01 01:19:21\n",
      "2013-10-01 01:21:19\n",
      "2013-10-01 01:24:20\n",
      "2013-10-01 01:26:45\n",
      "2013-10-01 01:18:28\n",
      "2013-10-01 01:19:45\n",
      "2013-10-01 01:28:10\n",
      "2013-10-01 01:22:20\n",
      "2013-10-01 01:18:42\n",
      "2013-10-01 01:19:52\n",
      "2013-10-01 01:18:44\n",
      "2013-10-01 01:15:11\n",
      "2013-10-01 01:19:24\n",
      "2013-10-01 01:23:58\n",
      "2013-10-01 01:28:50\n",
      "2013-10-01 01:13:24\n",
      "2013-10-01 01:28:38\n",
      "2013-10-01 01:24:50\n",
      "2013-10-01 01:14:19\n",
      "2013-10-01 01:10:05\n",
      "2013-10-01 01:26:31\n",
      "2013-10-01 01:28:01\n",
      "2013-09-30 23:44:16\n",
      "2013-10-01 01:21:43\n",
      "2013-10-01 01:26:57\n",
      "2013-10-01 01:25:25\n",
      "2013-10-01 01:25:36\n",
      "2013-10-01 01:16:34\n",
      "2013-10-01 01:26:40\n",
      "2013-10-01 01:14:56\n",
      "2013-10-01 01:13:10\n",
      "2013-10-01 01:28:34\n",
      "2013-10-01 01:19:08\n",
      "2013-10-01 01:24:57\n",
      "2013-10-01 00:52:43\n",
      "2013-10-01 01:25:28\n",
      "2013-10-01 01:22:54\n",
      "2013-10-01 01:28:49\n",
      "2013-10-01 00:13:25\n",
      "2013-10-07 10:34:47\n",
      "2013-10-07 10:38:08\n",
      "2013-10-07 10:31:10\n",
      "2013-10-07 10:35:12\n",
      "2013-10-07 10:41:50\n",
      "2013-10-07 10:34:31\n",
      "2013-10-07 10:42:02\n",
      "2013-10-07 10:39:05\n",
      "2013-10-07 10:31:43\n",
      "2013-10-07 10:34:27\n",
      "2013-10-07 10:31:48\n",
      "2013-10-07 10:42:24\n",
      "2013-10-07 10:38:37\n",
      "2013-10-07 10:29:02\n",
      "2013-10-07 10:33:55\n",
      "2013-10-07 10:17:07\n",
      "2013-10-07 10:44:31\n",
      "2013-10-07 10:42:52\n",
      "2013-10-07 10:26:05\n",
      "2013-10-07 10:34:07\n",
      "2013-10-07 10:40:59\n",
      "2013-10-07 10:41:36\n",
      "2013-10-07 10:33:47\n",
      "2013-10-07 10:30:59\n",
      "2013-10-07 10:38:59\n",
      "2013-10-07 10:28:56\n",
      "2013-10-07 10:41:24\n",
      "2013-10-07 10:41:49\n",
      "2013-10-07 10:42:47\n",
      "2013-10-07 10:34:09\n",
      "2013-10-07 10:40:31\n",
      "2013-10-07 10:21:34\n",
      "2013-10-07 10:43:52\n",
      "2013-10-07 10:18:11\n",
      "2013-10-07 10:41:47\n",
      "2013-10-07 10:33:04\n",
      "2013-10-07 10:40:53\n",
      "2013-10-07 10:36:38\n",
      "2013-10-07 10:41:46\n",
      "2013-10-07 10:03:36\n",
      "2013-10-07 10:44:45\n",
      "2013-10-07 10:21:42\n",
      "2013-10-07 10:24:07\n",
      "2013-10-07 10:40:35\n",
      "2013-10-07 10:41:00\n",
      "2013-10-07 10:43:10\n",
      "2013-10-07 10:23:55\n",
      "2013-10-07 10:43:30\n",
      "2013-10-07 10:25:24\n",
      "2013-10-07 10:35:07\n",
      "2013-10-07 10:43:33\n",
      "2013-10-07 10:39:30\n",
      "2013-10-07 10:31:42\n",
      "2013-10-07 10:39:17\n",
      "2013-10-07 10:42:47\n",
      "2013-10-07 10:39:20\n",
      "2013-10-07 10:44:41\n",
      "2013-10-07 10:24:22\n",
      "2013-10-07 10:12:39\n",
      "2013-10-07 10:37:25\n",
      "2013-10-07 10:42:55\n",
      "2013-10-07 10:14:35\n",
      "2013-10-07 10:37:12\n",
      "2013-10-07 10:32:29\n",
      "2013-10-07 10:42:37\n",
      "2013-10-07 10:26:52\n",
      "2013-10-07 10:31:19\n",
      "2013-10-07 10:44:58\n",
      "2013-11-01 20:47:37\n",
      "2013-11-01 20:54:00\n",
      "2013-11-01 20:58:53\n",
      "2013-11-01 20:56:37\n",
      "2013-11-01 20:56:09\n",
      "2013-11-01 20:51:05\n",
      "2013-11-01 20:50:58\n",
      "2013-11-01 20:55:26\n",
      "2013-11-01 20:53:43\n",
      "2013-11-01 20:53:46\n",
      "2013-11-01 20:54:55\n",
      "2013-11-01 20:59:28\n",
      "2013-11-01 20:56:54\n",
      "2013-11-01 20:50:37\n",
      "2013-11-01 20:48:40\n",
      "2013-11-01 20:55:46\n",
      "2013-11-01 20:45:20\n",
      "2013-11-01 20:46:22\n",
      "2013-11-01 20:48:25\n",
      "2013-11-01 20:47:19\n",
      "2013-11-01 20:57:31\n",
      "2013-11-01 20:58:14\n",
      "2013-11-01 20:49:30\n",
      "2013-11-01 20:43:31\n",
      "2013-11-01 20:59:00\n",
      "2013-11-01 20:54:23\n",
      "2013-11-01 20:51:01\n",
      "2013-11-01 20:38:12\n",
      "2013-11-01 20:59:31\n",
      "2013-11-01 20:56:46\n",
      "2013-11-01 20:53:51\n",
      "2013-11-01 20:48:00\n",
      "2013-11-01 20:58:04\n",
      "2013-11-01 20:52:50\n",
      "2013-11-01 20:58:12\n",
      "2013-11-01 20:57:37\n",
      "2013-11-01 20:53:33\n",
      "2013-11-01 20:54:11\n",
      "2013-11-01 20:48:49\n",
      "2013-11-01 20:42:56\n",
      "2013-11-01 20:55:36\n",
      "2013-11-01 20:51:36\n",
      "2013-11-01 20:48:45\n",
      "2013-11-01 20:49:17\n",
      "2013-11-01 20:53:50\n",
      "2013-11-01 20:45:28\n",
      "2013-11-01 20:45:04\n",
      "2013-11-01 20:52:17\n",
      "2013-11-01 20:52:10\n",
      "2013-11-01 20:59:16\n",
      "2013-11-01 20:51:37\n",
      "2013-11-01 20:50:10\n",
      "2013-12-22 06:24:50\n",
      "2013-12-22 06:04:12\n",
      "2013-12-22 06:16:27\n",
      "2013-12-22 06:23:06\n",
      "2013-12-22 06:24:04\n",
      "2013-12-22 06:17:33\n",
      "2013-12-22 06:22:55\n",
      "2013-12-22 06:24:35\n",
      "2013-12-22 06:21:56\n",
      "2013-12-22 06:22:49\n",
      "2013-12-22 06:25:31\n",
      "2013-12-22 06:21:31\n",
      "2013-12-22 06:27:31\n",
      "2013-12-22 06:29:45\n",
      "2013-12-22 06:26:09\n",
      "2013-12-22 06:17:08\n",
      "2013-12-22 06:26:00\n",
      "2013-12-22 06:20:56\n",
      "2013-12-22 06:23:09\n",
      "2013-12-22 06:22:31\n",
      "2013-12-22 06:29:59\n",
      "2013-12-22 06:27:43\n",
      "2013-12-22 06:23:04\n",
      "2013-12-22 06:25:30\n",
      "2013-12-22 06:19:16\n",
      "2013-12-22 06:23:06\n",
      "2013-12-22 06:26:01\n",
      "2013-12-22 06:19:45\n",
      "2013-12-22 02:34:23\n",
      "2013-12-22 06:29:54\n",
      "2013-12-22 06:28:39\n",
      "2013-12-22 06:27:43\n",
      "2013-12-22 06:16:23\n",
      "2013-12-22 06:17:26\n"
     ]
    }
   ],
   "source": [
    "for s in stamps:\n",
    "    print(datetime.datetime.fromtimestamp(s))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "ids = valid_data[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "numpy.bytes_"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(ids[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[\"b'1376502576620000126'\",\n",
       " \"b'1376503146620000161'\",\n",
       " \"b'1376502942620000500'\",\n",
       " \"b'1376502604620000105'\",\n",
       " \"b'1376502611620000022'\",\n",
       " \"b'1376503017620000272'\",\n",
       " \"b'1376501811620000617'\",\n",
       " \"b'1376502255620000663'\",\n",
       " \"b'1376502950620000005'\",\n",
       " \"b'1376502635620000276'\",\n",
       " \"b'1376502627620000596'\",\n",
       " \"b'1376502237620000675'\",\n",
       " \"b'1376500608620000409'\",\n",
       " \"b'1376502047620000574'\",\n",
       " \"b'1376502355620000338'\",\n",
       " \"b'1376502180620000080'\",\n",
       " \"b'1376502802620000680'\",\n",
       " \"b'1376502603620000142'\",\n",
       " \"b'1376501182620000651'\",\n",
       " \"b'1376503155620000026'\",\n",
       " \"b'1376502617620000657'\",\n",
       " \"b'1376502994620000604'\",\n",
       " \"b'1376502822620000093'\",\n",
       " \"b'1376502466620000561'\",\n",
       " \"b'1376503126620000410'\",\n",
       " \"b'1376501063620000343'\",\n",
       " \"b'1376502919620000166'\",\n",
       " \"b'1376503023620000010'\",\n",
       " \"b'1376502971620000517'\",\n",
       " \"b'1376503012620000273'\",\n",
       " \"b'1376503077620000470'\",\n",
       " \"b'1376500095620000569'\",\n",
       " \"b'1376502674620000426'\",\n",
       " \"b'1376503111620000674'\",\n",
       " \"b'1376502451620000310'\",\n",
       " \"b'1376501436620000344'\",\n",
       " \"b'1376500679620000108'\",\n",
       " \"b'1376502483620000356'\",\n",
       " \"b'1376502952620000687'\",\n",
       " \"b'1376502546620000254'\",\n",
       " \"b'1376503135620000053'\",\n",
       " \"b'1376502684620000503'\",\n",
       " \"b'1376502852620000321'\",\n",
       " \"b'1376502866620000421'\",\n",
       " \"b'1376502678620000460'\",\n",
       " \"b'1376503196620000386'\",\n",
       " \"b'1376502511620000480'\",\n",
       " \"b'1376502716620000224'\",\n",
       " \"b'1376501962620000507'\",\n",
       " \"b'1376503045620000633'\",\n",
       " \"b'1376503048620000349'\",\n",
       " \"b'1376503060620000049'\",\n",
       " \"b'1376501941620000667'\",\n",
       " \"b'1376502639620000281'\",\n",
       " \"b'1376498899620000172'\",\n",
       " \"b'1380615372620000303'\",\n",
       " \"b'1380616084620000260'\",\n",
       " \"b'1380615517620000372'\",\n",
       " \"b'1380615888620000588'\",\n",
       " \"b'1380615819620000042'\",\n",
       " \"b'1380616117620000325'\",\n",
       " \"b'1380615616620000040'\",\n",
       " \"b'1380615829620000682'\",\n",
       " \"b'1380616031620000001'\",\n",
       " \"b'1380614780620000352'\",\n",
       " \"b'1380616088620000513'\",\n",
       " \"b'1380616142620000289'\",\n",
       " \"b'1380615884620000166'\",\n",
       " \"b'1380615884620000671'\",\n",
       " \"b'1380615546620000187'\",\n",
       " \"b'1380612513620000172'\",\n",
       " \"b'1380616168620000472'\",\n",
       " \"b'1380616051620000597'\",\n",
       " \"b'1380615733620000105'\",\n",
       " \"b'1380615963620000137'\",\n",
       " \"b'1380616135620000672'\",\n",
       " \"b'1380615490620000574'\",\n",
       " \"b'1380615733620000051'\",\n",
       " \"b'1380615270620000612'\",\n",
       " \"b'1380615881620000031'\",\n",
       " \"b'1380615736620000246'\",\n",
       " \"b'1380615935620000367'\",\n",
       " \"b'1380615687620000577'\",\n",
       " \"b'1380615093620000272'\",\n",
       " \"b'1380615018620000632'\",\n",
       " \"b'1380614973620000258'\",\n",
       " \"b'1380614475620000032'\",\n",
       " \"b'1380615478620000138'\",\n",
       " \"b'1380615480620000381'\",\n",
       " \"b'1380615206620000397'\",\n",
       " \"b'1380615481620000077'\",\n",
       " \"b'1380615954620000546'\",\n",
       " \"b'1380615680620000192'\",\n",
       " \"b'1380615931620000068'\",\n",
       " \"b'1380615954620000395'\",\n",
       " \"b'1380615820620000482'\",\n",
       " \"b'1380616006620000080'\",\n",
       " \"b'1380615811620000431'\",\n",
       " \"b'1380615429620000602'\",\n",
       " \"b'1380615717620000497'\",\n",
       " \"b'1380612549620000161'\",\n",
       " \"b'1380615287620000675'\",\n",
       " \"b'1380614665620000458'\",\n",
       " \"b'1380615249620000222'\",\n",
       " \"b'1380615419620000487'\",\n",
       " \"b'1380616036620000669'\",\n",
       " \"b'1380615386620000476'\",\n",
       " \"b'1380615798620000523'\",\n",
       " \"b'1380615365620000215'\",\n",
       " \"b'1380616063620000065'\",\n",
       " \"b'1380614893620000011'\",\n",
       " \"b'1380615561620000391'\",\n",
       " \"b'1380615679620000004'\",\n",
       " \"b'1380615860620000429'\",\n",
       " \"b'1380616005620000695'\",\n",
       " \"b'1380615508620000361'\",\n",
       " \"b'1380615585620000665'\",\n",
       " \"b'1380616090620000562'\",\n",
       " \"b'1380615740620000398'\",\n",
       " \"b'1380615522620000156'\",\n",
       " \"b'1380615592620000674'\",\n",
       " \"b'1380615524620000279'\",\n",
       " \"b'1380615311620000540'\",\n",
       " \"b'1380615564620000216'\",\n",
       " \"b'1380615838620000324'\",\n",
       " \"b'1380616130620000356'\",\n",
       " \"b'1380615204620000387'\",\n",
       " \"b'1380616118620000649'\",\n",
       " \"b'1380615890620000159'\",\n",
       " \"b'1380615259620000393'\",\n",
       " \"b'1380615005620000249'\",\n",
       " \"b'1380615991620000589'\",\n",
       " \"b'1380616081620000633'\",\n",
       " \"b'1380609856620000609'\",\n",
       " \"b'1380615703620000410'\",\n",
       " \"b'1380616017620000470'\",\n",
       " \"b'1380615925620000177'\",\n",
       " \"b'1380615936620000547'\",\n",
       " \"b'1380615394620000400'\",\n",
       " \"b'1380616000620000140'\",\n",
       " \"b'1380615296620000020'\",\n",
       " \"b'1380615190620000477'\",\n",
       " \"b'1380616114620000151'\",\n",
       " \"b'1380615548620000247'\",\n",
       " \"b'1380615897620000616'\",\n",
       " \"b'1380613963620000005'\",\n",
       " \"b'1380615928620000449'\",\n",
       " \"b'1380615774620000158'\",\n",
       " \"b'1380616129620000281'\",\n",
       " \"b'1380611605620000351'\",\n",
       " \"b'1381167287620000123'\",\n",
       " \"b'1381167488620000626'\",\n",
       " \"b'1381167070620000142'\",\n",
       " \"b'1381167312620000337'\",\n",
       " \"b'1381167710620000684'\",\n",
       " \"b'1381167271620000159'\",\n",
       " \"b'1381167722620000624'\",\n",
       " \"b'1381167545620000419'\",\n",
       " \"b'1381167103620000114'\",\n",
       " \"b'1381167267620000668'\",\n",
       " \"b'1381167108620000307'\",\n",
       " \"b'1381167744620000051'\",\n",
       " \"b'1381167517620000356'\",\n",
       " \"b'1381166942620000518'\",\n",
       " \"b'1381167235620000529'\",\n",
       " \"b'1381166227620000901'\",\n",
       " \"b'1381167871620000463'\",\n",
       " \"b'1381167772620000495'\",\n",
       " \"b'1381166765620000008'\",\n",
       " \"b'1381167247620000345'\",\n",
       " \"b'1381167659620000235'\",\n",
       " \"b'1381167696620000085'\",\n",
       " \"b'1381167227620000156'\",\n",
       " \"b'1381167059620000004'\",\n",
       " \"b'1381167539620000256'\",\n",
       " \"b'1381166936620000426'\",\n",
       " \"b'1381167684620000621'\",\n",
       " \"b'1381167709620000249'\",\n",
       " \"b'1381167767620000094'\",\n",
       " \"b'1381167249620000675'\",\n",
       " \"b'1381167631620000116'\",\n",
       " \"b'1381166494620000480'\",\n",
       " \"b'1381167832620000074'\",\n",
       " \"b'1381166291620000326'\",\n",
       " \"b'1381167707620000653'\",\n",
       " \"b'1381167184620000560'\",\n",
       " \"b'1381167653620000295'\",\n",
       " \"b'1381167398620000686'\",\n",
       " \"b'1381167706620000321'\",\n",
       " \"b'1381165416620000697'\",\n",
       " \"b'1381167885620000280'\",\n",
       " \"b'1381166502620000297'\",\n",
       " \"b'1381166647620000657'\",\n",
       " \"b'1381167635620000662'\",\n",
       " \"b'1381167660620000594'\",\n",
       " \"b'1381167790620000093'\",\n",
       " \"b'1381166635620000195'\",\n",
       " \"b'1381167810620000431'\",\n",
       " \"b'1381166724620000311'\",\n",
       " \"b'1381167307620000591'\",\n",
       " \"b'1381167813620000267'\",\n",
       " \"b'1381167570620000648'\",\n",
       " \"b'1381167102620000525'\",\n",
       " \"b'1381167557620000424'\",\n",
       " \"b'1381167767620000160'\",\n",
       " \"b'1381167560620000633'\",\n",
       " \"b'1381167881620000391'\",\n",
       " \"b'1381166662620000189'\",\n",
       " \"b'1381165959620000138'\",\n",
       " \"b'1381167445620000344'\",\n",
       " \"b'1381167775620000049'\",\n",
       " \"b'1381166075620000068'\",\n",
       " \"b'1381167432620000001'\",\n",
       " \"b'1381167149620000257'\",\n",
       " \"b'1381167757620000324'\",\n",
       " \"b'1381166812620000595'\",\n",
       " \"b'1381167079620000535'\",\n",
       " \"b'1381167898620000667'\",\n",
       " \"b'1383364057620000066'\",\n",
       " \"b'1383364440620000010'\",\n",
       " \"b'1383364733620000009'\",\n",
       " \"b'1383364597620000601'\",\n",
       " \"b'1383364569620000356'\",\n",
       " \"b'1383364265620000007'\",\n",
       " \"b'1383364258620000574'\",\n",
       " \"b'1383364526620000108'\",\n",
       " \"b'1383364423620000015'\",\n",
       " \"b'1383364426620000632'\",\n",
       " \"b'1383364495620000611'\",\n",
       " \"b'1383364768620000388'\",\n",
       " \"b'1383364614620000372'\",\n",
       " \"b'1383364237620000455'\",\n",
       " \"b'1383364120620000403'\",\n",
       " \"b'1383364546620000041'\",\n",
       " \"b'1383363920620000020'\",\n",
       " \"b'1383363982620000591'\",\n",
       " \"b'1383364105620000665'\",\n",
       " \"b'1383364039620000618'\",\n",
       " \"b'1383364651620000513'\",\n",
       " \"b'1383364694620000364'\",\n",
       " \"b'1383364170620000239'\",\n",
       " \"b'1383363811620000031'\",\n",
       " \"b'1383364740620000252'\",\n",
       " \"b'1383364463620000345'\",\n",
       " \"b'1383364261620000436'\",\n",
       " \"b'1383363492620000672'\",\n",
       " \"b'1383364771620000320'\",\n",
       " \"b'1383364606620000508'\",\n",
       " \"b'1383364431620000233'\",\n",
       " \"b'1383364080620000527'\",\n",
       " \"b'1383364684620000005'\",\n",
       " \"b'1383364370620000140'\",\n",
       " \"b'1383364692620000118'\",\n",
       " \"b'1383364657620000570'\",\n",
       " \"b'1383364413620000492'\",\n",
       " \"b'1383364451620000309'\",\n",
       " \"b'1383364129620000013'\",\n",
       " \"b'1383363776620000434'\",\n",
       " \"b'1383364536620000217'\",\n",
       " \"b'1383364296620000112'\",\n",
       " \"b'1383364125620000625'\",\n",
       " \"b'1383364157620000648'\",\n",
       " \"b'1383364430620000542'\",\n",
       " \"b'1383363928620000616'\",\n",
       " \"b'1383363904620000105'\",\n",
       " \"b'1383364337620000612'\",\n",
       " \"b'1383364330620000333'\",\n",
       " \"b'1383364756620000540'\",\n",
       " \"b'1383364297620000596'\",\n",
       " \"b'1383364210620000153'\",\n",
       " \"b'1387722290620000362'\",\n",
       " \"b'1387721052620000311'\",\n",
       " \"b'1387721787620000046'\",\n",
       " \"b'1387722186620000565'\",\n",
       " \"b'1387722244620000068'\",\n",
       " \"b'1387721853620000403'\",\n",
       " \"b'1387722175620000633'\",\n",
       " \"b'1387722275620000172'\",\n",
       " \"b'1387722116620000187'\",\n",
       " \"b'1387722169620000060'\",\n",
       " \"b'1387722331620000058'\",\n",
       " \"b'1387722091620000607'\",\n",
       " \"b'1387722451620000540'\",\n",
       " \"b'1387722585620000430'\",\n",
       " \"b'1387722369620000120'\",\n",
       " \"b'1387721828620000123'\",\n",
       " \"b'1387722360620000391'\",\n",
       " \"b'1387722056620000089'\",\n",
       " \"b'1387722189620000480'\",\n",
       " \"b'1387722151620000184'\",\n",
       " \"b'1387722599620000137'\",\n",
       " \"b'1387722463620000314'\",\n",
       " \"b'1387722184620000057'\",\n",
       " \"b'1387722330620000171'\",\n",
       " \"b'1387721956620000373'\",\n",
       " \"b'1387722186620000197'\",\n",
       " \"b'1387722361620000697'\",\n",
       " \"b'1387721985620000173'\",\n",
       " \"b'1387708463620000329'\",\n",
       " \"b'1387722594620000900'\",\n",
       " \"b'1387722519620000482'\",\n",
       " \"b'1387722463620000481'\",\n",
       " \"b'1387721783620000030'\",\n",
       " \"b'1387721846620000247'\"]"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>TRIP_ID</th>\n",
       "      <th>CALL_TYPE</th>\n",
       "      <th>ORIGIN_CALL</th>\n",
       "      <th>ORIGIN_STAND</th>\n",
       "      <th>TAXI_ID</th>\n",
       "      <th>TIMESTAMP</th>\n",
       "      <th>DAY_TYPE</th>\n",
       "      <th>MISSING_DATA</th>\n",
       "      <th>POLYLINE</th>\n",
       "      <th>LATITUDE</th>\n",
       "      <th>LONGITUDE</th>\n",
       "      <th>TARGET</th>\n",
       "      <th>COORD_FEATURES</th>\n",
       "      <th>DAY_OF_WEEK</th>\n",
       "      <th>QUARTER_HOUR</th>\n",
       "      <th>WEEK_OF_YEAR</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1376500052620000184</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>115</td>\n",
       "      <td>1376500052</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.649891,41.154399],[-8.649981,41.154417],[...</td>\n",
       "      <td>[-0.0392686, -0.0390627, -0.0440035, -0.049458...</td>\n",
       "      <td>[-0.590024, -0.591592, -0.596627, -0.596793, -...</td>\n",
       "      <td>[-8.61043, 41.1411]</td>\n",
       "      <td>[-0.590024, -0.591592, -0.596627, -0.596793, -...</td>\n",
       "      <td>2</td>\n",
       "      <td>40</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1376500461620000525</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>214</td>\n",
       "      <td>1376500461</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.610876,41.145759],[-8.610849,41.145759],[...</td>\n",
       "      <td>[-0.155839, -0.155839, -0.151619, -0.14673, -0...</td>\n",
       "      <td>[0.0920491, 0.0925159, 0.0985014, 0.105587, 0....</td>\n",
       "      <td>[-8.63072, 41.1547]</td>\n",
       "      <td>[0.0920491, 0.0925159, 0.0985014, 0.105587, 0....</td>\n",
       "      <td>2</td>\n",
       "      <td>40</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1376501327620000095</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>367</td>\n",
       "      <td>1376501327</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.613243,41.166873],[-8.613252,41.166747],[...</td>\n",
       "      <td>[0.129025, 0.127327, 0.125474, 0.118835, 0.104...</td>\n",
       "      <td>[0.0506678, 0.0505178, 0.0497175, 0.0700247, 0...</td>\n",
       "      <td>[-8.61534, 41.1407]</td>\n",
       "      <td>[0.0506678, 0.0505178, 0.0497175, 0.0700247, 0...</td>\n",
       "      <td>2</td>\n",
       "      <td>41</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1376501783620000173</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>39</td>\n",
       "      <td>1376501783</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.606988,41.15025],[-8.607213,41.150007],[-...</td>\n",
       "      <td>[-0.0952637, -0.0985575, -0.112865, -0.113843,...</td>\n",
       "      <td>[0.160023, 0.156088, 0.148386, 0.145868, 0.144...</td>\n",
       "      <td>[-8.55426, 41.1628]</td>\n",
       "      <td>[0.160023, 0.156088, 0.148386, 0.145868, 0.144...</td>\n",
       "      <td>2</td>\n",
       "      <td>42</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1376501113620000252</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>364</td>\n",
       "      <td>1376501113</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.628273,41.157405],[-8.628255,41.157423],[...</td>\n",
       "      <td>[0.00128665, 0.00149252, 0.00236744, 0.0135356...</td>\n",
       "      <td>[-0.212091, -0.211775, -0.209724, -0.20894, -0...</td>\n",
       "      <td>[-8.61928, 41.1786]</td>\n",
       "      <td>[-0.212091, -0.211775, -0.209724, -0.20894, -0...</td>\n",
       "      <td>2</td>\n",
       "      <td>41</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1376501483620000424</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>19</td>\n",
       "      <td>25</td>\n",
       "      <td>1376501483</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.605818,41.153391],[-8.607339,41.153427],[...</td>\n",
       "      <td>[-0.0528556, -0.0523924, -0.0513116, -0.050694...</td>\n",
       "      <td>[0.18048, 0.153888, 0.112506, 0.0797781, 0.071...</td>\n",
       "      <td>[-8.64643, 41.1616]</td>\n",
       "      <td>[0.18048, 0.153888, 0.112506, 0.0797781, 0.071...</td>\n",
       "      <td>2</td>\n",
       "      <td>42</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1376500461620000326</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>240</td>\n",
       "      <td>1376500461</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.611137,41.149332],[-8.611263,41.149161],[...</td>\n",
       "      <td>[-0.107667, -0.109931, -0.110086, -0.110086, -...</td>\n",
       "      <td>[0.0874808, 0.08528, 0.0849633, 0.0848132, 0.0...</td>\n",
       "      <td>[-8.61446, 41.1422]</td>\n",
       "      <td>[0.0874808, 0.08528, 0.0849633, 0.0848132, 0.0...</td>\n",
       "      <td>2</td>\n",
       "      <td>40</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1376500453620000263</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>407</td>\n",
       "      <td>1376500453</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.586396,41.149224],[-8.586378,41.149026],[...</td>\n",
       "      <td>[-0.109108, -0.111784, -0.11199, -0.107873, -0...</td>\n",
       "      <td>[0.520016, 0.520333, 0.513247, 0.49249, 0.4643...</td>\n",
       "      <td>[-8.58591, 41.1486]</td>\n",
       "      <td>[0.520016, 0.520333, 0.513247, 0.49249, 0.4643...</td>\n",
       "      <td>2</td>\n",
       "      <td>40</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1376499820620000467</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>270</td>\n",
       "      <td>1376499820</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.625177,41.157333],[-8.625609,41.157405],[...</td>\n",
       "      <td>[0.000308796, 0.00128665, 0.00494074, 0.006021...</td>\n",
       "      <td>[-0.157972, -0.165525, -0.194935, -0.202171, -...</td>\n",
       "      <td>[-8.64726, 41.1732]</td>\n",
       "      <td>[-0.157972, -0.165525, -0.194935, -0.202171, -...</td>\n",
       "      <td>2</td>\n",
       "      <td>40</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1376503568620000213</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>28</td>\n",
       "      <td>431</td>\n",
       "      <td>1376503568</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.584335,41.163111],[-8.585127,41.162922],[...</td>\n",
       "      <td>[0.0782799, 0.0757066, 0.0835809, 0.0913522, 0...</td>\n",
       "      <td>[0.556046, 0.542208, 0.51058, 0.479736, 0.4769...</td>\n",
       "      <td>[-8.58525, 41.1689]</td>\n",
       "      <td>[0.556046, 0.542208, 0.51058, 0.479736, 0.4769...</td>\n",
       "      <td>2</td>\n",
       "      <td>44</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1376503240620000002</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>63</td>\n",
       "      <td>421</td>\n",
       "      <td>1376503240</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.609688,41.160348],[-8.609967,41.159277],[...</td>\n",
       "      <td>[0.040967, 0.0265565, 0.00370556, 0.000669059,...</td>\n",
       "      <td>[0.112823, 0.107938, 0.107938, 0.107471, 0.106...</td>\n",
       "      <td>[-8.61071, 41.1456]</td>\n",
       "      <td>[0.112823, 0.107938, 0.107938, 0.107471, 0.106...</td>\n",
       "      <td>2</td>\n",
       "      <td>44</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>1376504312620000617</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>199</td>\n",
       "      <td>1376504312</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.624502,41.179554],[-8.624511,41.179527],[...</td>\n",
       "      <td>[0.300099, 0.299738, 0.299738, 0.299841, 0.299...</td>\n",
       "      <td>[-0.146168, -0.146318, -0.146485, -0.146318, -...</td>\n",
       "      <td>[-8.62455, 41.1796]</td>\n",
       "      <td>[-0.146168, -0.146318, -0.146485, -0.146318, -...</td>\n",
       "      <td>2</td>\n",
       "      <td>45</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>1376502661620000400</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>29</td>\n",
       "      <td>117</td>\n",
       "      <td>1376502661</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.638443,41.170797],[-8.6382,41.170716],[-8...</td>\n",
       "      <td>[0.181932, 0.180852, 0.184866, 0.192174, 0.200...</td>\n",
       "      <td>[-0.389887, -0.385636, -0.36046, -0.330883, -0...</td>\n",
       "      <td>[-8.6206, 41.1739]</td>\n",
       "      <td>[-0.389887, -0.385636, -0.36046, -0.330883, -0...</td>\n",
       "      <td>2</td>\n",
       "      <td>43</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1376500537620000246</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>318</td>\n",
       "      <td>1376500537</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.628147,41.157198],[-8.628156,41.157198],[...</td>\n",
       "      <td>[-0.00149252, -0.00149252, -0.00128665, -0.001...</td>\n",
       "      <td>[-0.209891, -0.210041, -0.20879, -0.208473, -0...</td>\n",
       "      <td>[-8.61782, 41.1525]</td>\n",
       "      <td>[-0.209891, -0.210041, -0.20879, -0.208473, -0...</td>\n",
       "      <td>2</td>\n",
       "      <td>41</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1376502120620000557</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>32</td>\n",
       "      <td>245</td>\n",
       "      <td>1376502120</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.627643,41.157765],[-8.627958,41.1579],[-8...</td>\n",
       "      <td>[0.00612446, 0.00797724, 0.0135356, 0.0206894,...</td>\n",
       "      <td>[-0.201071, -0.206589, -0.20879, -0.228147, -0...</td>\n",
       "      <td>[-8.61148, 41.1461]</td>\n",
       "      <td>[-0.201071, -0.206589, -0.20879, -0.228147, -0...</td>\n",
       "      <td>2</td>\n",
       "      <td>42</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1376496951620000012</td>\n",
       "      <td>A</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>79</td>\n",
       "      <td>1376496951</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.604045,41.182569],[-8.604135,41.182353],[...</td>\n",
       "      <td>[0.340757, 0.337875, 0.316876, 0.295724, 0.278...</td>\n",
       "      <td>[0.211474, 0.209907, 0.197003, 0.183148, 0.161...</td>\n",
       "      <td>[-8.62064, 41.1643]</td>\n",
       "      <td>[0.211474, 0.209907, 0.197003, 0.183148, 0.161...</td>\n",
       "      <td>2</td>\n",
       "      <td>37</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1376501723620000554</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>53</td>\n",
       "      <td>183</td>\n",
       "      <td>1376501723</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.613945,41.141277],[-8.613972,41.141286],[...</td>\n",
       "      <td>[-0.216312, -0.216209, -0.221047, -0.222642, -...</td>\n",
       "      <td>[0.0383969, 0.03793, 0.0220411, 0.0168393, 0.0...</td>\n",
       "      <td>[-8.63607, 41.1592]</td>\n",
       "      <td>[0.0383969, 0.03793, 0.0220411, 0.0168393, 0.0...</td>\n",
       "      <td>2</td>\n",
       "      <td>42</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1376503551620000376</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>34</td>\n",
       "      <td>246</td>\n",
       "      <td>1376503551</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.615556,41.14071],[-8.615565,41.140692],[-...</td>\n",
       "      <td>[-0.22398, -0.224186, -0.22434, -0.22362, -0.2...</td>\n",
       "      <td>[0.0102369, 0.0100702, 0.0100702, 0.010387, 0....</td>\n",
       "      <td>[-8.64072, 41.1612]</td>\n",
       "      <td>[0.0102369, 0.0100702, 0.0100702, 0.010387, 0....</td>\n",
       "      <td>2</td>\n",
       "      <td>44</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>1376504171620000146</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>338</td>\n",
       "      <td>1376504171</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.606979,41.150268],[-8.607285,41.150124],[...</td>\n",
       "      <td>[-0.0950063, -0.096962, -0.0962415, -0.0962415...</td>\n",
       "      <td>[0.160173, 0.154838, 0.148852, 0.148536, 0.128...</td>\n",
       "      <td>[-8.61805, 41.1525]</td>\n",
       "      <td>[0.160173, 0.154838, 0.148852, 0.148536, 0.128...</td>\n",
       "      <td>2</td>\n",
       "      <td>45</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1376506047620000026</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>57</td>\n",
       "      <td>167</td>\n",
       "      <td>1376506047</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.610804,41.145741],[-8.610822,41.145768],[...</td>\n",
       "      <td>[-0.156097, -0.155736, -0.155839, -0.151722, -...</td>\n",
       "      <td>[0.0933162, 0.0929994, 0.0917323, 0.0961339, 0...</td>\n",
       "      <td>[-8.60417, 41.1489]</td>\n",
       "      <td>[0.0933162, 0.0929994, 0.0917323, 0.0961339, 0...</td>\n",
       "      <td>2</td>\n",
       "      <td>47</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>1376505311620000392</td>\n",
       "      <td>A</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>349</td>\n",
       "      <td>1376505311</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.583165,41.164713],[-8.583012,41.164407],[...</td>\n",
       "      <td>[0.0998956, 0.0957268, 0.0964474, 0.105557, 0....</td>\n",
       "      <td>[0.576503, 0.579187, 0.580438, 0.580121, 0.597...</td>\n",
       "      <td>[-8.6118, 41.1429]</td>\n",
       "      <td>[0.576503, 0.579187, 0.580438, 0.580121, 0.597...</td>\n",
       "      <td>2</td>\n",
       "      <td>46</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1376505833620000120</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>144</td>\n",
       "      <td>1376505833</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.628345,41.15763],[-8.628345,41.157576],[-...</td>\n",
       "      <td>[0.00432315, 0.00360262, 0.00504367, 0.0026247...</td>\n",
       "      <td>[-0.213342, -0.213342, -0.206906, -0.178896, -...</td>\n",
       "      <td>[-8.61802, 41.1501]</td>\n",
       "      <td>[-0.213342, -0.213342, -0.206906, -0.178896, -...</td>\n",
       "      <td>2</td>\n",
       "      <td>46</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1376506874620000255</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>33</td>\n",
       "      <td>194</td>\n",
       "      <td>1376506874</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.600184,41.182686],[-8.600031,41.182758],[...</td>\n",
       "      <td>[0.342352, 0.34333, 0.33736, 0.335559, 0.33314...</td>\n",
       "      <td>[0.278965, 0.281649, 0.310276, 0.319096, 0.341...</td>\n",
       "      <td>[-8.56627, 41.1814]</td>\n",
       "      <td>[0.278965, 0.281649, 0.310276, 0.319096, 0.341...</td>\n",
       "      <td>2</td>\n",
       "      <td>48</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>1376503763620000015</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>60</td>\n",
       "      <td>48</td>\n",
       "      <td>1376503763</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.609706,41.151276],[-8.609679,41.151294],[...</td>\n",
       "      <td>[-0.0814193, -0.081162, -0.0792063, -0.0644355...</td>\n",
       "      <td>[0.112506, 0.112973, 0.115491, 0.107788, 0.108...</td>\n",
       "      <td>[-8.61818, 41.1696]</td>\n",
       "      <td>[0.112506, 0.112973, 0.115491, 0.107788, 0.108...</td>\n",
       "      <td>2</td>\n",
       "      <td>44</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>1376501181620000360</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>37</td>\n",
       "      <td>1376501181</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.598996,41.149026],[-8.598843,41.148873],[...</td>\n",
       "      <td>[-0.111784, -0.113843, -0.115284, -0.119195, -...</td>\n",
       "      <td>[0.299739, 0.302423, 0.303357, 0.305408, 0.304...</td>\n",
       "      <td>[-8.60023, 41.1493]</td>\n",
       "      <td>[0.299739, 0.302423, 0.303357, 0.305408, 0.304...</td>\n",
       "      <td>2</td>\n",
       "      <td>41</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>1376504563620000017</td>\n",
       "      <td>A</td>\n",
       "      <td>954</td>\n",
       "      <td>0</td>\n",
       "      <td>335</td>\n",
       "      <td>1376504563</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.618022,41.151519],[-8.618337,41.151447],[...</td>\n",
       "      <td>[-0.0781255, -0.0791033, -0.0844558, -0.087132...</td>\n",
       "      <td>[-0.0328782, -0.0383802, -0.0624553, -0.079444...</td>\n",
       "      <td>[-8.59822, 41.1484]</td>\n",
       "      <td>[-0.0328782, -0.0383802, -0.0624553, -0.079444...</td>\n",
       "      <td>2</td>\n",
       "      <td>45</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>1376507238620000114</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>165</td>\n",
       "      <td>1376507238</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.63028,41.157432],[-8.630505,41.157153],[-...</td>\n",
       "      <td>[0.00164691, -0.00211011, -0.00452901, 0.01085...</td>\n",
       "      <td>[-0.24717, -0.251105, -0.271246, -0.289819, -0...</td>\n",
       "      <td>[-8.65056, 41.1615]</td>\n",
       "      <td>[-0.24717, -0.251105, -0.271246, -0.289819, -0...</td>\n",
       "      <td>2</td>\n",
       "      <td>48</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>1376501378620000195</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>60</td>\n",
       "      <td>67</td>\n",
       "      <td>1376501378</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.609499,41.151294],[-8.609535,41.151312],[...</td>\n",
       "      <td>[-0.081162, -0.0809046, -0.0778681, -0.0758095...</td>\n",
       "      <td>[0.116124, 0.115491, 0.117375, 0.111556, 0.100...</td>\n",
       "      <td>[-8.61674, 41.137]</td>\n",
       "      <td>[0.116124, 0.115491, 0.117375, 0.111556, 0.100...</td>\n",
       "      <td>2</td>\n",
       "      <td>41</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>1376506638620000038</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>140</td>\n",
       "      <td>1376506638</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.632323,41.164326],[-8.632917,41.164065],[...</td>\n",
       "      <td>[0.0946461, 0.0911464, 0.0867718, 0.093205, 0....</td>\n",
       "      <td>[-0.2829, -0.293287, -0.317345, -0.346305, -0....</td>\n",
       "      <td>[-8.65428, 41.181]</td>\n",
       "      <td>[-0.2829, -0.293287, -0.317345, -0.346305, -0....</td>\n",
       "      <td>2</td>\n",
       "      <td>47</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>1376504586620000608</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>18</td>\n",
       "      <td>310</td>\n",
       "      <td>1376504586</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.619921,41.148018],[-8.620218,41.147712],[...</td>\n",
       "      <td>[-0.125371, -0.129489, -0.1176, -0.104013, -0....</td>\n",
       "      <td>[-0.0660733, -0.0712751, -0.0792946, -0.084179...</td>\n",
       "      <td>[-8.61061, 41.1515]</td>\n",
       "      <td>[-0.0660733, -0.0712751, -0.0792946, -0.084179...</td>\n",
       "      <td>2</td>\n",
       "      <td>45</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>274</th>\n",
       "      <td>1387725593620000440</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>42</td>\n",
       "      <td>233</td>\n",
       "      <td>1387725593</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.612145,41.172777],[-8.612568,41.172768],[...</td>\n",
       "      <td>[0.208643, 0.20854, 0.216569, 0.226142, 0.2362...</td>\n",
       "      <td>[0.0698579, 0.062472, 0.0542858, 0.0513014, 0....</td>\n",
       "      <td>[-8.58568, 41.1489]</td>\n",
       "      <td>[0.0698579, 0.062472, 0.0542858, 0.0513014, 0....</td>\n",
       "      <td>6</td>\n",
       "      <td>29</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>1387726426620000621</td>\n",
       "      <td>A</td>\n",
       "      <td>1602</td>\n",
       "      <td>0</td>\n",
       "      <td>34</td>\n",
       "      <td>1387726426</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.648964,41.179752],[-8.648982,41.179752],[...</td>\n",
       "      <td>[0.302775, 0.302775, 0.314663, 0.316104, 0.316...</td>\n",
       "      <td>[-0.573819, -0.574136, -0.57807, -0.576336, -0...</td>\n",
       "      <td>[-8.63323, 41.1756]</td>\n",
       "      <td>[-0.573819, -0.574136, -0.57807, -0.576336, -0...</td>\n",
       "      <td>6</td>\n",
       "      <td>30</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276</th>\n",
       "      <td>1387728068620000012</td>\n",
       "      <td>A</td>\n",
       "      <td>3521</td>\n",
       "      <td>0</td>\n",
       "      <td>79</td>\n",
       "      <td>1387728068</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.658126,41.154876],[-8.657829,41.154579],[...</td>\n",
       "      <td>[-0.0328353, -0.0368497, -0.0219245, -0.001286...</td>\n",
       "      <td>[-0.733992, -0.728807, -0.743912, -0.769238, -...</td>\n",
       "      <td>[-8.65425, 41.1809]</td>\n",
       "      <td>[-0.733992, -0.728807, -0.743912, -0.769238, -...</td>\n",
       "      <td>6</td>\n",
       "      <td>32</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>277</th>\n",
       "      <td>1387728077620000502</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>54</td>\n",
       "      <td>116</td>\n",
       "      <td>1387728077</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.630316,41.15754],[-8.629668,41.157],[-8.6...</td>\n",
       "      <td>[0.00308796, -0.00416875, -0.00844043, -0.0272...</td>\n",
       "      <td>[-0.247804, -0.236483, -0.233182, -0.24497, -0...</td>\n",
       "      <td>[-8.6304, 41.1554]</td>\n",
       "      <td>[-0.247804, -0.236483, -0.233182, -0.24497, -0...</td>\n",
       "      <td>6</td>\n",
       "      <td>32</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>278</th>\n",
       "      <td>1387729770620000384</td>\n",
       "      <td>A</td>\n",
       "      <td>3184</td>\n",
       "      <td>0</td>\n",
       "      <td>225</td>\n",
       "      <td>1387729770</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.6121,41.158674],[-8.6121,41.158674],[-8.6...</td>\n",
       "      <td>[0.0183734, 0.0183734, 0.00452901, -0.0170353,...</td>\n",
       "      <td>[0.0706582, 0.0706582, 0.064356, 0.0511347, 0....</td>\n",
       "      <td>[-8.62106, 41.151]</td>\n",
       "      <td>[0.0706582, 0.0706582, 0.064356, 0.0511347, 0....</td>\n",
       "      <td>6</td>\n",
       "      <td>33</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>279</th>\n",
       "      <td>1387728089620000640</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>26</td>\n",
       "      <td>218</td>\n",
       "      <td>1387728089</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.580204,41.15934],[-8.580627,41.159241],[-...</td>\n",
       "      <td>[0.0273799, 0.0260418, 0.015131, -0.005301, -0...</td>\n",
       "      <td>[0.628271, 0.620869, 0.633156, 0.637241, 0.637...</td>\n",
       "      <td>[-8.58601, 41.1486]</td>\n",
       "      <td>[0.628271, 0.620869, 0.633156, 0.637241, 0.637...</td>\n",
       "      <td>6</td>\n",
       "      <td>32</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>280</th>\n",
       "      <td>1387727123620000055</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>352</td>\n",
       "      <td>1387727123</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.63991,41.15979],[-8.640693,41.159664],[-8...</td>\n",
       "      <td>[0.0334529, 0.0317546, 0.00586713, -0.0161089,...</td>\n",
       "      <td>[-0.41553, -0.429218, -0.438821, -0.447324, -0...</td>\n",
       "      <td>[-8.6178, 41.1471]</td>\n",
       "      <td>[-0.41553, -0.429218, -0.438821, -0.447324, -0...</td>\n",
       "      <td>6</td>\n",
       "      <td>31</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>281</th>\n",
       "      <td>1387728500620000271</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>57</td>\n",
       "      <td>234</td>\n",
       "      <td>1387728500</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.610885,41.14566],[-8.610885,41.145669],[-...</td>\n",
       "      <td>[-0.157177, -0.157074, -0.157435, -0.156714, -...</td>\n",
       "      <td>[0.091899, 0.091899, 0.0917323, 0.0923659, 0.0...</td>\n",
       "      <td>[-8.66138, 41.1481]</td>\n",
       "      <td>[0.091899, 0.091899, 0.0917323, 0.0923659, 0.0...</td>\n",
       "      <td>6</td>\n",
       "      <td>32</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>282</th>\n",
       "      <td>1387729808620000151</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>28</td>\n",
       "      <td>146</td>\n",
       "      <td>1387729808</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.584335,41.163156],[-8.584425,41.163102],[...</td>\n",
       "      <td>[0.078846, 0.0781255, 0.0776623, 0.0841985, 0....</td>\n",
       "      <td>[0.556046, 0.554479, 0.533088, 0.506012, 0.478...</td>\n",
       "      <td>[-8.6117, 41.16]</td>\n",
       "      <td>[0.556046, 0.554479, 0.533088, 0.506012, 0.478...</td>\n",
       "      <td>6</td>\n",
       "      <td>34</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>283</th>\n",
       "      <td>1387660057620000026</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>57</td>\n",
       "      <td>167</td>\n",
       "      <td>1387660057</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.610768,41.145642],[-8.610759,41.145642],[...</td>\n",
       "      <td>[-0.157435, -0.157435, -0.157074, -0.156354, -...</td>\n",
       "      <td>[0.0939331, 0.0940998, 0.0942499, 0.0939331, 0...</td>\n",
       "      <td>[-8.63085, 41.1466]</td>\n",
       "      <td>[0.0939331, 0.0940998, 0.0942499, 0.0939331, 0...</td>\n",
       "      <td>5</td>\n",
       "      <td>52</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>284</th>\n",
       "      <td>1387727477620000513</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>53</td>\n",
       "      <td>366</td>\n",
       "      <td>1387727477</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.613972,41.141349],[-8.613963,41.141349],[...</td>\n",
       "      <td>[-0.215334, -0.215334, -0.216929, -0.205607, -...</td>\n",
       "      <td>[0.03793, 0.0380801, 0.029277, 0.0308442, 0.03...</td>\n",
       "      <td>[-8.61403, 41.1499]</td>\n",
       "      <td>[0.03793, 0.0380801, 0.029277, 0.0308442, 0.03...</td>\n",
       "      <td>6</td>\n",
       "      <td>31</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>285</th>\n",
       "      <td>1387725100620000157</td>\n",
       "      <td>A</td>\n",
       "      <td>254</td>\n",
       "      <td>0</td>\n",
       "      <td>390</td>\n",
       "      <td>1387725100</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.676234,41.15484],[-8.676198,41.154822],[-...</td>\n",
       "      <td>[-0.03335, -0.0335559, -0.0335559, -0.0334529,...</td>\n",
       "      <td>[-1.05057, -1.04994, -1.04994, -1.04962, -1.04...</td>\n",
       "      <td>[-8.6488, 41.1486]</td>\n",
       "      <td>[-1.05057, -1.04994, -1.04994, -1.04962, -1.04...</td>\n",
       "      <td>6</td>\n",
       "      <td>28</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>286</th>\n",
       "      <td>1387731453620000032</td>\n",
       "      <td>A</td>\n",
       "      <td>9559</td>\n",
       "      <td>0</td>\n",
       "      <td>371</td>\n",
       "      <td>1387731453</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.657946,41.148234],[-8.657937,41.148207],[...</td>\n",
       "      <td>[-0.122438, -0.122798, -0.122798, -0.122695, -...</td>\n",
       "      <td>[-0.730841, -0.730691, -0.730691, -0.730541, -...</td>\n",
       "      <td>[-8.65648, 41.1532]</td>\n",
       "      <td>[-0.730841, -0.730691, -0.730691, -0.730541, -...</td>\n",
       "      <td>6</td>\n",
       "      <td>35</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>287</th>\n",
       "      <td>1387730991620000217</td>\n",
       "      <td>A</td>\n",
       "      <td>20908</td>\n",
       "      <td>0</td>\n",
       "      <td>321</td>\n",
       "      <td>1387730991</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.569818,41.170158],[-8.569278,41.169996],[...</td>\n",
       "      <td>[0.173338, 0.171125, 0.158052, 0.160934, 0.166...</td>\n",
       "      <td>[0.809852, 0.819288, 0.845881, 0.854534, 0.858...</td>\n",
       "      <td>[-8.572, 41.1629]</td>\n",
       "      <td>[0.809852, 0.819288, 0.845881, 0.854534, 0.858...</td>\n",
       "      <td>6</td>\n",
       "      <td>35</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>288</th>\n",
       "      <td>1387723778620000364</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>419</td>\n",
       "      <td>1387723778</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.628867,41.160996],[-8.628849,41.160951],[...</td>\n",
       "      <td>[0.0497162, 0.0490986, 0.0543481, 0.0718466, 0...</td>\n",
       "      <td>[-0.222478, -0.222162, -0.209724, -0.202021, -...</td>\n",
       "      <td>[-8.71435, 41.2082]</td>\n",
       "      <td>[-0.222478, -0.222162, -0.209724, -0.202021, -...</td>\n",
       "      <td>6</td>\n",
       "      <td>27</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>289</th>\n",
       "      <td>1387731647620000129</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>57</td>\n",
       "      <td>265</td>\n",
       "      <td>1387731647</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.610759,41.145651],[-8.610768,41.145678],[...</td>\n",
       "      <td>[-0.15728, -0.15692, -0.155839, -0.154244, -0....</td>\n",
       "      <td>[0.0940998, 0.0939331, 0.0936163, 0.0936163, 0...</td>\n",
       "      <td>[-8.63835, 41.1592]</td>\n",
       "      <td>[0.0940998, 0.0939331, 0.0936163, 0.0936163, 0...</td>\n",
       "      <td>6</td>\n",
       "      <td>36</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>290</th>\n",
       "      <td>1387733802620000364</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>419</td>\n",
       "      <td>1387733802</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.628786,41.161041],[-8.628579,41.160897],[...</td>\n",
       "      <td>[0.0503338, 0.0483781, 0.0476576, 0.0474002, 0...</td>\n",
       "      <td>[-0.221061, -0.217443, -0.21776, -0.21791, -0....</td>\n",
       "      <td>[-8.596, 41.1696]</td>\n",
       "      <td>[-0.221061, -0.217443, -0.21776, -0.21791, -0....</td>\n",
       "      <td>6</td>\n",
       "      <td>38</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>291</th>\n",
       "      <td>1387731776620000207</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>36</td>\n",
       "      <td>211</td>\n",
       "      <td>1387731776</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.649423,41.154345],[-8.6499,41.154273],[-8...</td>\n",
       "      <td>[-0.0399891, -0.040967, -0.0452387, -0.0437976...</td>\n",
       "      <td>[-0.581838, -0.590191, -0.59616, -0.579487, -0...</td>\n",
       "      <td>[-8.57125, 41.1646]</td>\n",
       "      <td>[-0.581838, -0.590191, -0.59616, -0.579487, -0...</td>\n",
       "      <td>6</td>\n",
       "      <td>36</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>292</th>\n",
       "      <td>1387729265620000068</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>185</td>\n",
       "      <td>1387729265</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.608779,41.147793],[-8.608734,41.147802],[...</td>\n",
       "      <td>[-0.128408, -0.128305, -0.128305, -0.128408, -...</td>\n",
       "      <td>[0.128712, 0.129496, 0.129812, 0.133114, 0.133...</td>\n",
       "      <td>[-8.62051, 41.1651]</td>\n",
       "      <td>[0.128712, 0.129496, 0.129812, 0.133114, 0.133...</td>\n",
       "      <td>6</td>\n",
       "      <td>33</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>293</th>\n",
       "      <td>1387735526620000023</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>404</td>\n",
       "      <td>1387735526</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.597673,41.142681],[-8.597682,41.142681]]</td>\n",
       "      <td>[-0.197372, -0.197372]</td>\n",
       "      <td>[0.322864, 0.322714]</td>\n",
       "      <td>[-8.59768, 41.1427]</td>\n",
       "      <td>[0.322864, 0.322864, 0.322864, 0.322864, 0.322...</td>\n",
       "      <td>6</td>\n",
       "      <td>40</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>294</th>\n",
       "      <td>1387713713620000255</td>\n",
       "      <td>A</td>\n",
       "      <td>34988</td>\n",
       "      <td>0</td>\n",
       "      <td>194</td>\n",
       "      <td>1387713713</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.594352,41.169375],[-8.594352,41.169375],[...</td>\n",
       "      <td>[0.162787, 0.162787, 0.16289, 0.162993, 0.1631...</td>\n",
       "      <td>[0.380934, 0.380934, 0.381084, 0.381084, 0.381...</td>\n",
       "      <td>[-8.58298, 41.1704]</td>\n",
       "      <td>[0.380934, 0.380934, 0.381084, 0.381084, 0.381...</td>\n",
       "      <td>6</td>\n",
       "      <td>16</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>295</th>\n",
       "      <td>1387735341620000216</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>331</td>\n",
       "      <td>1387735341</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.630766,41.154948],[-8.631414,41.15439],[-...</td>\n",
       "      <td>[-0.0318575, -0.039423, -0.054554, -0.0752434,...</td>\n",
       "      <td>[-0.255673, -0.267011, -0.283683, -0.29422, -0...</td>\n",
       "      <td>[-8.63564, 41.1406]</td>\n",
       "      <td>[-0.255673, -0.267011, -0.283683, -0.29422, -0...</td>\n",
       "      <td>6</td>\n",
       "      <td>40</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>296</th>\n",
       "      <td>1387731258620000486</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>75</td>\n",
       "      <td>1387731258</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.59698,41.171328],[-8.595054,41.172327],[-...</td>\n",
       "      <td>[0.189138, 0.20257, 0.253367, 0.308848, 0.3575...</td>\n",
       "      <td>[0.334985, 0.368663, 0.395873, 0.406426, 0.397...</td>\n",
       "      <td>[-8.33168, 41.2035]</td>\n",
       "      <td>[0.334985, 0.368663, 0.395873, 0.406426, 0.397...</td>\n",
       "      <td>6</td>\n",
       "      <td>35</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297</th>\n",
       "      <td>1387737095620000217</td>\n",
       "      <td>A</td>\n",
       "      <td>495</td>\n",
       "      <td>0</td>\n",
       "      <td>321</td>\n",
       "      <td>1387737095</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.591688,41.159556],[-8.591625,41.159421],[...</td>\n",
       "      <td>[0.0303135, 0.0284607, 0.0216672, 0.0165721, 0...</td>\n",
       "      <td>[0.427501, 0.428601, 0.428134, 0.413496, 0.402...</td>\n",
       "      <td>[-8.60578, 41.1498]</td>\n",
       "      <td>[0.427501, 0.428601, 0.428134, 0.413496, 0.402...</td>\n",
       "      <td>6</td>\n",
       "      <td>42</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>298</th>\n",
       "      <td>1387737450620000384</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>52</td>\n",
       "      <td>225</td>\n",
       "      <td>1387737450</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.61327,41.154453],[-8.613297,41.154147],[-...</td>\n",
       "      <td>[-0.0385481, -0.0426654, -0.0465768, -0.047657...</td>\n",
       "      <td>[0.050201, 0.0497175, 0.0495675, 0.0564866, 0....</td>\n",
       "      <td>[-8.58762, 41.1885]</td>\n",
       "      <td>[0.050201, 0.0497175, 0.0495675, 0.0564866, 0....</td>\n",
       "      <td>6</td>\n",
       "      <td>42</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>299</th>\n",
       "      <td>1387740537620000657</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>47</td>\n",
       "      <td>17</td>\n",
       "      <td>1387740537</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.654796,41.173551],[-8.654526,41.173668],[...</td>\n",
       "      <td>[0.219091, 0.220686, 0.236486, 0.239369, 0.233...</td>\n",
       "      <td>[-0.675771, -0.671053, -0.652646, -0.632039, -...</td>\n",
       "      <td>[-8.63023, 41.1584]</td>\n",
       "      <td>[-0.675771, -0.671053, -0.652646, -0.632039, -...</td>\n",
       "      <td>6</td>\n",
       "      <td>45</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>300</th>\n",
       "      <td>1387742161620000503</td>\n",
       "      <td>C</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>33</td>\n",
       "      <td>1387742161</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.639487,41.167422],[-8.639424,41.16753],[-...</td>\n",
       "      <td>[0.136436, 0.137878, 0.135819, 0.12393, 0.1178...</td>\n",
       "      <td>[-0.408144, -0.407043, -0.402008, -0.397757, -...</td>\n",
       "      <td>[-8.66577, 41.2102]</td>\n",
       "      <td>[-0.408144, -0.407043, -0.402008, -0.397757, -...</td>\n",
       "      <td>6</td>\n",
       "      <td>47</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>301</th>\n",
       "      <td>1387755659620000372</td>\n",
       "      <td>A</td>\n",
       "      <td>481</td>\n",
       "      <td>0</td>\n",
       "      <td>27</td>\n",
       "      <td>1387755659</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.679753,41.156559],[-8.679717,41.156568],[...</td>\n",
       "      <td>[-0.0101388, -0.0100359, -0.00308796, -0.00710...</td>\n",
       "      <td>[-1.11209, -1.11146, -1.0954, -1.07763, -1.058...</td>\n",
       "      <td>[-8.61165, 41.1461]</td>\n",
       "      <td>[-1.11209, -1.11146, -1.0954, -1.07763, -1.058...</td>\n",
       "      <td>6</td>\n",
       "      <td>62</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>302</th>\n",
       "      <td>1387735327620000068</td>\n",
       "      <td>B</td>\n",
       "      <td>0</td>\n",
       "      <td>27</td>\n",
       "      <td>185</td>\n",
       "      <td>1387735327</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.608707,41.147811],[-8.608689,41.147829],[...</td>\n",
       "      <td>[-0.12815, -0.127945, -0.128665, -0.13304, -0....</td>\n",
       "      <td>[0.129962, 0.130279, 0.13328, 0.129812, 0.1073...</td>\n",
       "      <td>[-8.62782, 41.1698]</td>\n",
       "      <td>[0.129962, 0.130279, 0.13328, 0.129812, 0.1073...</td>\n",
       "      <td>6</td>\n",
       "      <td>40</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303</th>\n",
       "      <td>1387788528620000010</td>\n",
       "      <td>A</td>\n",
       "      <td>8312</td>\n",
       "      <td>0</td>\n",
       "      <td>26</td>\n",
       "      <td>1387788528</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>[[-8.609247,41.155182],[-8.60922,41.155254],[-...</td>\n",
       "      <td>[-0.0287181, -0.0277402, -0.0210496, -0.021409...</td>\n",
       "      <td>[0.120526, 0.121009, 0.117541, 0.108105, 0.106...</td>\n",
       "      <td>[-8.61635, 41.163]</td>\n",
       "      <td>[0.120526, 0.121009, 0.117541, 0.108105, 0.106...</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>52</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>304 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 TRIP_ID CALL_TYPE ORIGIN_CALL ORIGIN_STAND TAXI_ID  \\\n",
       "0    1376500052620000184         C           0            0     115   \n",
       "1    1376500461620000525         C           0            0     214   \n",
       "2    1376501327620000095         B           0           11     367   \n",
       "3    1376501783620000173         B           0           10      39   \n",
       "4    1376501113620000252         B           0           13     364   \n",
       "5    1376501483620000424         B           0           19      25   \n",
       "6    1376500461620000326         B           0           14     240   \n",
       "7    1376500453620000263         C           0            0     407   \n",
       "8    1376499820620000467         C           0            0     270   \n",
       "9    1376503568620000213         B           0           28     431   \n",
       "10   1376503240620000002         B           0           63     421   \n",
       "11   1376504312620000617         C           0            0     199   \n",
       "12   1376502661620000400         B           0           29     117   \n",
       "13   1376500537620000246         B           0           13     318   \n",
       "14   1376502120620000557         B           0           32     245   \n",
       "15   1376496951620000012         A           7            0      79   \n",
       "16   1376501723620000554         B           0           53     183   \n",
       "17   1376503551620000376         B           0           34     246   \n",
       "18   1376504171620000146         B           0           10     338   \n",
       "19   1376506047620000026         B           0           57     167   \n",
       "20   1376505311620000392         A           7            0     349   \n",
       "21   1376505833620000120         B           0           13     144   \n",
       "22   1376506874620000255         B           0           33     194   \n",
       "23   1376503763620000015         B           0           60      48   \n",
       "24   1376501181620000360         B           0            0      37   \n",
       "25   1376504563620000017         A         954            0     335   \n",
       "26   1376507238620000114         C           0            0     165   \n",
       "27   1376501378620000195         B           0           60      67   \n",
       "28   1376506638620000038         B           0           17     140   \n",
       "29   1376504586620000608         B           0           18     310   \n",
       "..                   ...       ...         ...          ...     ...   \n",
       "274  1387725593620000440         B           0           42     233   \n",
       "275  1387726426620000621         A        1602            0      34   \n",
       "276  1387728068620000012         A        3521            0      79   \n",
       "277  1387728077620000502         B           0           54     116   \n",
       "278  1387729770620000384         A        3184            0     225   \n",
       "279  1387728089620000640         B           0           26     218   \n",
       "280  1387727123620000055         B           0            7     352   \n",
       "281  1387728500620000271         B           0           57     234   \n",
       "282  1387729808620000151         B           0           28     146   \n",
       "283  1387660057620000026         B           0           57     167   \n",
       "284  1387727477620000513         B           0           53     366   \n",
       "285  1387725100620000157         A         254            0     390   \n",
       "286  1387731453620000032         A        9559            0     371   \n",
       "287  1387730991620000217         A       20908            0     321   \n",
       "288  1387723778620000364         B           0           21     419   \n",
       "289  1387731647620000129         B           0           57     265   \n",
       "290  1387733802620000364         B           0           21     419   \n",
       "291  1387731776620000207         B           0           36     211   \n",
       "292  1387729265620000068         B           0            0     185   \n",
       "293  1387735526620000023         C           0            0     404   \n",
       "294  1387713713620000255         A       34988            0     194   \n",
       "295  1387735341620000216         B           0           12     331   \n",
       "296  1387731258620000486         C           0            0      75   \n",
       "297  1387737095620000217         A         495            0     321   \n",
       "298  1387737450620000384         B           0           52     225   \n",
       "299  1387740537620000657         B           0           47      17   \n",
       "300  1387742161620000503         C           0            0      33   \n",
       "301  1387755659620000372         A         481            0      27   \n",
       "302  1387735327620000068         B           0           27     185   \n",
       "303  1387788528620000010         A        8312            0      26   \n",
       "\n",
       "      TIMESTAMP DAY_TYPE MISSING_DATA  \\\n",
       "0    1376500052        0        False   \n",
       "1    1376500461        0        False   \n",
       "2    1376501327        0        False   \n",
       "3    1376501783        0        False   \n",
       "4    1376501113        0        False   \n",
       "5    1376501483        0        False   \n",
       "6    1376500461        0        False   \n",
       "7    1376500453        0        False   \n",
       "8    1376499820        0        False   \n",
       "9    1376503568        0        False   \n",
       "10   1376503240        0        False   \n",
       "11   1376504312        0        False   \n",
       "12   1376502661        0        False   \n",
       "13   1376500537        0        False   \n",
       "14   1376502120        0        False   \n",
       "15   1376496951        0        False   \n",
       "16   1376501723        0        False   \n",
       "17   1376503551        0        False   \n",
       "18   1376504171        0        False   \n",
       "19   1376506047        0        False   \n",
       "20   1376505311        0        False   \n",
       "21   1376505833        0        False   \n",
       "22   1376506874        0        False   \n",
       "23   1376503763        0        False   \n",
       "24   1376501181        0        False   \n",
       "25   1376504563        0        False   \n",
       "26   1376507238        0        False   \n",
       "27   1376501378        0        False   \n",
       "28   1376506638        0        False   \n",
       "29   1376504586        0        False   \n",
       "..          ...      ...          ...   \n",
       "274  1387725593        0        False   \n",
       "275  1387726426        0        False   \n",
       "276  1387728068        0        False   \n",
       "277  1387728077        0        False   \n",
       "278  1387729770        0        False   \n",
       "279  1387728089        0        False   \n",
       "280  1387727123        0        False   \n",
       "281  1387728500        0        False   \n",
       "282  1387729808        0        False   \n",
       "283  1387660057        0        False   \n",
       "284  1387727477        0        False   \n",
       "285  1387725100        0        False   \n",
       "286  1387731453        0        False   \n",
       "287  1387730991        0        False   \n",
       "288  1387723778        0        False   \n",
       "289  1387731647        0        False   \n",
       "290  1387733802        0        False   \n",
       "291  1387731776        0        False   \n",
       "292  1387729265        0        False   \n",
       "293  1387735526        0        False   \n",
       "294  1387713713        0        False   \n",
       "295  1387735341        0        False   \n",
       "296  1387731258        0        False   \n",
       "297  1387737095        0        False   \n",
       "298  1387737450        0        False   \n",
       "299  1387740537        0        False   \n",
       "300  1387742161        0        False   \n",
       "301  1387755659        0        False   \n",
       "302  1387735327        0        False   \n",
       "303  1387788528        0        False   \n",
       "\n",
       "                                              POLYLINE  \\\n",
       "0    [[-8.649891,41.154399],[-8.649981,41.154417],[...   \n",
       "1    [[-8.610876,41.145759],[-8.610849,41.145759],[...   \n",
       "2    [[-8.613243,41.166873],[-8.613252,41.166747],[...   \n",
       "3    [[-8.606988,41.15025],[-8.607213,41.150007],[-...   \n",
       "4    [[-8.628273,41.157405],[-8.628255,41.157423],[...   \n",
       "5    [[-8.605818,41.153391],[-8.607339,41.153427],[...   \n",
       "6    [[-8.611137,41.149332],[-8.611263,41.149161],[...   \n",
       "7    [[-8.586396,41.149224],[-8.586378,41.149026],[...   \n",
       "8    [[-8.625177,41.157333],[-8.625609,41.157405],[...   \n",
       "9    [[-8.584335,41.163111],[-8.585127,41.162922],[...   \n",
       "10   [[-8.609688,41.160348],[-8.609967,41.159277],[...   \n",
       "11   [[-8.624502,41.179554],[-8.624511,41.179527],[...   \n",
       "12   [[-8.638443,41.170797],[-8.6382,41.170716],[-8...   \n",
       "13   [[-8.628147,41.157198],[-8.628156,41.157198],[...   \n",
       "14   [[-8.627643,41.157765],[-8.627958,41.1579],[-8...   \n",
       "15   [[-8.604045,41.182569],[-8.604135,41.182353],[...   \n",
       "16   [[-8.613945,41.141277],[-8.613972,41.141286],[...   \n",
       "17   [[-8.615556,41.14071],[-8.615565,41.140692],[-...   \n",
       "18   [[-8.606979,41.150268],[-8.607285,41.150124],[...   \n",
       "19   [[-8.610804,41.145741],[-8.610822,41.145768],[...   \n",
       "20   [[-8.583165,41.164713],[-8.583012,41.164407],[...   \n",
       "21   [[-8.628345,41.15763],[-8.628345,41.157576],[-...   \n",
       "22   [[-8.600184,41.182686],[-8.600031,41.182758],[...   \n",
       "23   [[-8.609706,41.151276],[-8.609679,41.151294],[...   \n",
       "24   [[-8.598996,41.149026],[-8.598843,41.148873],[...   \n",
       "25   [[-8.618022,41.151519],[-8.618337,41.151447],[...   \n",
       "26   [[-8.63028,41.157432],[-8.630505,41.157153],[-...   \n",
       "27   [[-8.609499,41.151294],[-8.609535,41.151312],[...   \n",
       "28   [[-8.632323,41.164326],[-8.632917,41.164065],[...   \n",
       "29   [[-8.619921,41.148018],[-8.620218,41.147712],[...   \n",
       "..                                                 ...   \n",
       "274  [[-8.612145,41.172777],[-8.612568,41.172768],[...   \n",
       "275  [[-8.648964,41.179752],[-8.648982,41.179752],[...   \n",
       "276  [[-8.658126,41.154876],[-8.657829,41.154579],[...   \n",
       "277  [[-8.630316,41.15754],[-8.629668,41.157],[-8.6...   \n",
       "278  [[-8.6121,41.158674],[-8.6121,41.158674],[-8.6...   \n",
       "279  [[-8.580204,41.15934],[-8.580627,41.159241],[-...   \n",
       "280  [[-8.63991,41.15979],[-8.640693,41.159664],[-8...   \n",
       "281  [[-8.610885,41.14566],[-8.610885,41.145669],[-...   \n",
       "282  [[-8.584335,41.163156],[-8.584425,41.163102],[...   \n",
       "283  [[-8.610768,41.145642],[-8.610759,41.145642],[...   \n",
       "284  [[-8.613972,41.141349],[-8.613963,41.141349],[...   \n",
       "285  [[-8.676234,41.15484],[-8.676198,41.154822],[-...   \n",
       "286  [[-8.657946,41.148234],[-8.657937,41.148207],[...   \n",
       "287  [[-8.569818,41.170158],[-8.569278,41.169996],[...   \n",
       "288  [[-8.628867,41.160996],[-8.628849,41.160951],[...   \n",
       "289  [[-8.610759,41.145651],[-8.610768,41.145678],[...   \n",
       "290  [[-8.628786,41.161041],[-8.628579,41.160897],[...   \n",
       "291  [[-8.649423,41.154345],[-8.6499,41.154273],[-8...   \n",
       "292  [[-8.608779,41.147793],[-8.608734,41.147802],[...   \n",
       "293      [[-8.597673,41.142681],[-8.597682,41.142681]]   \n",
       "294  [[-8.594352,41.169375],[-8.594352,41.169375],[...   \n",
       "295  [[-8.630766,41.154948],[-8.631414,41.15439],[-...   \n",
       "296  [[-8.59698,41.171328],[-8.595054,41.172327],[-...   \n",
       "297  [[-8.591688,41.159556],[-8.591625,41.159421],[...   \n",
       "298  [[-8.61327,41.154453],[-8.613297,41.154147],[-...   \n",
       "299  [[-8.654796,41.173551],[-8.654526,41.173668],[...   \n",
       "300  [[-8.639487,41.167422],[-8.639424,41.16753],[-...   \n",
       "301  [[-8.679753,41.156559],[-8.679717,41.156568],[...   \n",
       "302  [[-8.608707,41.147811],[-8.608689,41.147829],[...   \n",
       "303  [[-8.609247,41.155182],[-8.60922,41.155254],[-...   \n",
       "\n",
       "                                              LATITUDE  \\\n",
       "0    [-0.0392686, -0.0390627, -0.0440035, -0.049458...   \n",
       "1    [-0.155839, -0.155839, -0.151619, -0.14673, -0...   \n",
       "2    [0.129025, 0.127327, 0.125474, 0.118835, 0.104...   \n",
       "3    [-0.0952637, -0.0985575, -0.112865, -0.113843,...   \n",
       "4    [0.00128665, 0.00149252, 0.00236744, 0.0135356...   \n",
       "5    [-0.0528556, -0.0523924, -0.0513116, -0.050694...   \n",
       "6    [-0.107667, -0.109931, -0.110086, -0.110086, -...   \n",
       "7    [-0.109108, -0.111784, -0.11199, -0.107873, -0...   \n",
       "8    [0.000308796, 0.00128665, 0.00494074, 0.006021...   \n",
       "9    [0.0782799, 0.0757066, 0.0835809, 0.0913522, 0...   \n",
       "10   [0.040967, 0.0265565, 0.00370556, 0.000669059,...   \n",
       "11   [0.300099, 0.299738, 0.299738, 0.299841, 0.299...   \n",
       "12   [0.181932, 0.180852, 0.184866, 0.192174, 0.200...   \n",
       "13   [-0.00149252, -0.00149252, -0.00128665, -0.001...   \n",
       "14   [0.00612446, 0.00797724, 0.0135356, 0.0206894,...   \n",
       "15   [0.340757, 0.337875, 0.316876, 0.295724, 0.278...   \n",
       "16   [-0.216312, -0.216209, -0.221047, -0.222642, -...   \n",
       "17   [-0.22398, -0.224186, -0.22434, -0.22362, -0.2...   \n",
       "18   [-0.0950063, -0.096962, -0.0962415, -0.0962415...   \n",
       "19   [-0.156097, -0.155736, -0.155839, -0.151722, -...   \n",
       "20   [0.0998956, 0.0957268, 0.0964474, 0.105557, 0....   \n",
       "21   [0.00432315, 0.00360262, 0.00504367, 0.0026247...   \n",
       "22   [0.342352, 0.34333, 0.33736, 0.335559, 0.33314...   \n",
       "23   [-0.0814193, -0.081162, -0.0792063, -0.0644355...   \n",
       "24   [-0.111784, -0.113843, -0.115284, -0.119195, -...   \n",
       "25   [-0.0781255, -0.0791033, -0.0844558, -0.087132...   \n",
       "26   [0.00164691, -0.00211011, -0.00452901, 0.01085...   \n",
       "27   [-0.081162, -0.0809046, -0.0778681, -0.0758095...   \n",
       "28   [0.0946461, 0.0911464, 0.0867718, 0.093205, 0....   \n",
       "29   [-0.125371, -0.129489, -0.1176, -0.104013, -0....   \n",
       "..                                                 ...   \n",
       "274  [0.208643, 0.20854, 0.216569, 0.226142, 0.2362...   \n",
       "275  [0.302775, 0.302775, 0.314663, 0.316104, 0.316...   \n",
       "276  [-0.0328353, -0.0368497, -0.0219245, -0.001286...   \n",
       "277  [0.00308796, -0.00416875, -0.00844043, -0.0272...   \n",
       "278  [0.0183734, 0.0183734, 0.00452901, -0.0170353,...   \n",
       "279  [0.0273799, 0.0260418, 0.015131, -0.005301, -0...   \n",
       "280  [0.0334529, 0.0317546, 0.00586713, -0.0161089,...   \n",
       "281  [-0.157177, -0.157074, -0.157435, -0.156714, -...   \n",
       "282  [0.078846, 0.0781255, 0.0776623, 0.0841985, 0....   \n",
       "283  [-0.157435, -0.157435, -0.157074, -0.156354, -...   \n",
       "284  [-0.215334, -0.215334, -0.216929, -0.205607, -...   \n",
       "285  [-0.03335, -0.0335559, -0.0335559, -0.0334529,...   \n",
       "286  [-0.122438, -0.122798, -0.122798, -0.122695, -...   \n",
       "287  [0.173338, 0.171125, 0.158052, 0.160934, 0.166...   \n",
       "288  [0.0497162, 0.0490986, 0.0543481, 0.0718466, 0...   \n",
       "289  [-0.15728, -0.15692, -0.155839, -0.154244, -0....   \n",
       "290  [0.0503338, 0.0483781, 0.0476576, 0.0474002, 0...   \n",
       "291  [-0.0399891, -0.040967, -0.0452387, -0.0437976...   \n",
       "292  [-0.128408, -0.128305, -0.128305, -0.128408, -...   \n",
       "293                             [-0.197372, -0.197372]   \n",
       "294  [0.162787, 0.162787, 0.16289, 0.162993, 0.1631...   \n",
       "295  [-0.0318575, -0.039423, -0.054554, -0.0752434,...   \n",
       "296  [0.189138, 0.20257, 0.253367, 0.308848, 0.3575...   \n",
       "297  [0.0303135, 0.0284607, 0.0216672, 0.0165721, 0...   \n",
       "298  [-0.0385481, -0.0426654, -0.0465768, -0.047657...   \n",
       "299  [0.219091, 0.220686, 0.236486, 0.239369, 0.233...   \n",
       "300  [0.136436, 0.137878, 0.135819, 0.12393, 0.1178...   \n",
       "301  [-0.0101388, -0.0100359, -0.00308796, -0.00710...   \n",
       "302  [-0.12815, -0.127945, -0.128665, -0.13304, -0....   \n",
       "303  [-0.0287181, -0.0277402, -0.0210496, -0.021409...   \n",
       "\n",
       "                                             LONGITUDE               TARGET  \\\n",
       "0    [-0.590024, -0.591592, -0.596627, -0.596793, -...  [-8.61043, 41.1411]   \n",
       "1    [0.0920491, 0.0925159, 0.0985014, 0.105587, 0....  [-8.63072, 41.1547]   \n",
       "2    [0.0506678, 0.0505178, 0.0497175, 0.0700247, 0...  [-8.61534, 41.1407]   \n",
       "3    [0.160023, 0.156088, 0.148386, 0.145868, 0.144...  [-8.55426, 41.1628]   \n",
       "4    [-0.212091, -0.211775, -0.209724, -0.20894, -0...  [-8.61928, 41.1786]   \n",
       "5    [0.18048, 0.153888, 0.112506, 0.0797781, 0.071...  [-8.64643, 41.1616]   \n",
       "6    [0.0874808, 0.08528, 0.0849633, 0.0848132, 0.0...  [-8.61446, 41.1422]   \n",
       "7    [0.520016, 0.520333, 0.513247, 0.49249, 0.4643...  [-8.58591, 41.1486]   \n",
       "8    [-0.157972, -0.165525, -0.194935, -0.202171, -...  [-8.64726, 41.1732]   \n",
       "9    [0.556046, 0.542208, 0.51058, 0.479736, 0.4769...  [-8.58525, 41.1689]   \n",
       "10   [0.112823, 0.107938, 0.107938, 0.107471, 0.106...  [-8.61071, 41.1456]   \n",
       "11   [-0.146168, -0.146318, -0.146485, -0.146318, -...  [-8.62455, 41.1796]   \n",
       "12   [-0.389887, -0.385636, -0.36046, -0.330883, -0...   [-8.6206, 41.1739]   \n",
       "13   [-0.209891, -0.210041, -0.20879, -0.208473, -0...  [-8.61782, 41.1525]   \n",
       "14   [-0.201071, -0.206589, -0.20879, -0.228147, -0...  [-8.61148, 41.1461]   \n",
       "15   [0.211474, 0.209907, 0.197003, 0.183148, 0.161...  [-8.62064, 41.1643]   \n",
       "16   [0.0383969, 0.03793, 0.0220411, 0.0168393, 0.0...  [-8.63607, 41.1592]   \n",
       "17   [0.0102369, 0.0100702, 0.0100702, 0.010387, 0....  [-8.64072, 41.1612]   \n",
       "18   [0.160173, 0.154838, 0.148852, 0.148536, 0.128...  [-8.61805, 41.1525]   \n",
       "19   [0.0933162, 0.0929994, 0.0917323, 0.0961339, 0...  [-8.60417, 41.1489]   \n",
       "20   [0.576503, 0.579187, 0.580438, 0.580121, 0.597...   [-8.6118, 41.1429]   \n",
       "21   [-0.213342, -0.213342, -0.206906, -0.178896, -...  [-8.61802, 41.1501]   \n",
       "22   [0.278965, 0.281649, 0.310276, 0.319096, 0.341...  [-8.56627, 41.1814]   \n",
       "23   [0.112506, 0.112973, 0.115491, 0.107788, 0.108...  [-8.61818, 41.1696]   \n",
       "24   [0.299739, 0.302423, 0.303357, 0.305408, 0.304...  [-8.60023, 41.1493]   \n",
       "25   [-0.0328782, -0.0383802, -0.0624553, -0.079444...  [-8.59822, 41.1484]   \n",
       "26   [-0.24717, -0.251105, -0.271246, -0.289819, -0...  [-8.65056, 41.1615]   \n",
       "27   [0.116124, 0.115491, 0.117375, 0.111556, 0.100...   [-8.61674, 41.137]   \n",
       "28   [-0.2829, -0.293287, -0.317345, -0.346305, -0....   [-8.65428, 41.181]   \n",
       "29   [-0.0660733, -0.0712751, -0.0792946, -0.084179...  [-8.61061, 41.1515]   \n",
       "..                                                 ...                  ...   \n",
       "274  [0.0698579, 0.062472, 0.0542858, 0.0513014, 0....  [-8.58568, 41.1489]   \n",
       "275  [-0.573819, -0.574136, -0.57807, -0.576336, -0...  [-8.63323, 41.1756]   \n",
       "276  [-0.733992, -0.728807, -0.743912, -0.769238, -...  [-8.65425, 41.1809]   \n",
       "277  [-0.247804, -0.236483, -0.233182, -0.24497, -0...   [-8.6304, 41.1554]   \n",
       "278  [0.0706582, 0.0706582, 0.064356, 0.0511347, 0....   [-8.62106, 41.151]   \n",
       "279  [0.628271, 0.620869, 0.633156, 0.637241, 0.637...  [-8.58601, 41.1486]   \n",
       "280  [-0.41553, -0.429218, -0.438821, -0.447324, -0...   [-8.6178, 41.1471]   \n",
       "281  [0.091899, 0.091899, 0.0917323, 0.0923659, 0.0...  [-8.66138, 41.1481]   \n",
       "282  [0.556046, 0.554479, 0.533088, 0.506012, 0.478...     [-8.6117, 41.16]   \n",
       "283  [0.0939331, 0.0940998, 0.0942499, 0.0939331, 0...  [-8.63085, 41.1466]   \n",
       "284  [0.03793, 0.0380801, 0.029277, 0.0308442, 0.03...  [-8.61403, 41.1499]   \n",
       "285  [-1.05057, -1.04994, -1.04994, -1.04962, -1.04...   [-8.6488, 41.1486]   \n",
       "286  [-0.730841, -0.730691, -0.730691, -0.730541, -...  [-8.65648, 41.1532]   \n",
       "287  [0.809852, 0.819288, 0.845881, 0.854534, 0.858...    [-8.572, 41.1629]   \n",
       "288  [-0.222478, -0.222162, -0.209724, -0.202021, -...  [-8.71435, 41.2082]   \n",
       "289  [0.0940998, 0.0939331, 0.0936163, 0.0936163, 0...  [-8.63835, 41.1592]   \n",
       "290  [-0.221061, -0.217443, -0.21776, -0.21791, -0....    [-8.596, 41.1696]   \n",
       "291  [-0.581838, -0.590191, -0.59616, -0.579487, -0...  [-8.57125, 41.1646]   \n",
       "292  [0.128712, 0.129496, 0.129812, 0.133114, 0.133...  [-8.62051, 41.1651]   \n",
       "293                               [0.322864, 0.322714]  [-8.59768, 41.1427]   \n",
       "294  [0.380934, 0.380934, 0.381084, 0.381084, 0.381...  [-8.58298, 41.1704]   \n",
       "295  [-0.255673, -0.267011, -0.283683, -0.29422, -0...  [-8.63564, 41.1406]   \n",
       "296  [0.334985, 0.368663, 0.395873, 0.406426, 0.397...  [-8.33168, 41.2035]   \n",
       "297  [0.427501, 0.428601, 0.428134, 0.413496, 0.402...  [-8.60578, 41.1498]   \n",
       "298  [0.050201, 0.0497175, 0.0495675, 0.0564866, 0....  [-8.58762, 41.1885]   \n",
       "299  [-0.675771, -0.671053, -0.652646, -0.632039, -...  [-8.63023, 41.1584]   \n",
       "300  [-0.408144, -0.407043, -0.402008, -0.397757, -...  [-8.66577, 41.2102]   \n",
       "301  [-1.11209, -1.11146, -1.0954, -1.07763, -1.058...  [-8.61165, 41.1461]   \n",
       "302  [0.129962, 0.130279, 0.13328, 0.129812, 0.1073...  [-8.62782, 41.1698]   \n",
       "303  [0.120526, 0.121009, 0.117541, 0.108105, 0.106...   [-8.61635, 41.163]   \n",
       "\n",
       "                                        COORD_FEATURES DAY_OF_WEEK  \\\n",
       "0    [-0.590024, -0.591592, -0.596627, -0.596793, -...           2   \n",
       "1    [0.0920491, 0.0925159, 0.0985014, 0.105587, 0....           2   \n",
       "2    [0.0506678, 0.0505178, 0.0497175, 0.0700247, 0...           2   \n",
       "3    [0.160023, 0.156088, 0.148386, 0.145868, 0.144...           2   \n",
       "4    [-0.212091, -0.211775, -0.209724, -0.20894, -0...           2   \n",
       "5    [0.18048, 0.153888, 0.112506, 0.0797781, 0.071...           2   \n",
       "6    [0.0874808, 0.08528, 0.0849633, 0.0848132, 0.0...           2   \n",
       "7    [0.520016, 0.520333, 0.513247, 0.49249, 0.4643...           2   \n",
       "8    [-0.157972, -0.165525, -0.194935, -0.202171, -...           2   \n",
       "9    [0.556046, 0.542208, 0.51058, 0.479736, 0.4769...           2   \n",
       "10   [0.112823, 0.107938, 0.107938, 0.107471, 0.106...           2   \n",
       "11   [-0.146168, -0.146318, -0.146485, -0.146318, -...           2   \n",
       "12   [-0.389887, -0.385636, -0.36046, -0.330883, -0...           2   \n",
       "13   [-0.209891, -0.210041, -0.20879, -0.208473, -0...           2   \n",
       "14   [-0.201071, -0.206589, -0.20879, -0.228147, -0...           2   \n",
       "15   [0.211474, 0.209907, 0.197003, 0.183148, 0.161...           2   \n",
       "16   [0.0383969, 0.03793, 0.0220411, 0.0168393, 0.0...           2   \n",
       "17   [0.0102369, 0.0100702, 0.0100702, 0.010387, 0....           2   \n",
       "18   [0.160173, 0.154838, 0.148852, 0.148536, 0.128...           2   \n",
       "19   [0.0933162, 0.0929994, 0.0917323, 0.0961339, 0...           2   \n",
       "20   [0.576503, 0.579187, 0.580438, 0.580121, 0.597...           2   \n",
       "21   [-0.213342, -0.213342, -0.206906, -0.178896, -...           2   \n",
       "22   [0.278965, 0.281649, 0.310276, 0.319096, 0.341...           2   \n",
       "23   [0.112506, 0.112973, 0.115491, 0.107788, 0.108...           2   \n",
       "24   [0.299739, 0.302423, 0.303357, 0.305408, 0.304...           2   \n",
       "25   [-0.0328782, -0.0383802, -0.0624553, -0.079444...           2   \n",
       "26   [-0.24717, -0.251105, -0.271246, -0.289819, -0...           2   \n",
       "27   [0.116124, 0.115491, 0.117375, 0.111556, 0.100...           2   \n",
       "28   [-0.2829, -0.293287, -0.317345, -0.346305, -0....           2   \n",
       "29   [-0.0660733, -0.0712751, -0.0792946, -0.084179...           2   \n",
       "..                                                 ...         ...   \n",
       "274  [0.0698579, 0.062472, 0.0542858, 0.0513014, 0....           6   \n",
       "275  [-0.573819, -0.574136, -0.57807, -0.576336, -0...           6   \n",
       "276  [-0.733992, -0.728807, -0.743912, -0.769238, -...           6   \n",
       "277  [-0.247804, -0.236483, -0.233182, -0.24497, -0...           6   \n",
       "278  [0.0706582, 0.0706582, 0.064356, 0.0511347, 0....           6   \n",
       "279  [0.628271, 0.620869, 0.633156, 0.637241, 0.637...           6   \n",
       "280  [-0.41553, -0.429218, -0.438821, -0.447324, -0...           6   \n",
       "281  [0.091899, 0.091899, 0.0917323, 0.0923659, 0.0...           6   \n",
       "282  [0.556046, 0.554479, 0.533088, 0.506012, 0.478...           6   \n",
       "283  [0.0939331, 0.0940998, 0.0942499, 0.0939331, 0...           5   \n",
       "284  [0.03793, 0.0380801, 0.029277, 0.0308442, 0.03...           6   \n",
       "285  [-1.05057, -1.04994, -1.04994, -1.04962, -1.04...           6   \n",
       "286  [-0.730841, -0.730691, -0.730691, -0.730541, -...           6   \n",
       "287  [0.809852, 0.819288, 0.845881, 0.854534, 0.858...           6   \n",
       "288  [-0.222478, -0.222162, -0.209724, -0.202021, -...           6   \n",
       "289  [0.0940998, 0.0939331, 0.0936163, 0.0936163, 0...           6   \n",
       "290  [-0.221061, -0.217443, -0.21776, -0.21791, -0....           6   \n",
       "291  [-0.581838, -0.590191, -0.59616, -0.579487, -0...           6   \n",
       "292  [0.128712, 0.129496, 0.129812, 0.133114, 0.133...           6   \n",
       "293  [0.322864, 0.322864, 0.322864, 0.322864, 0.322...           6   \n",
       "294  [0.380934, 0.380934, 0.381084, 0.381084, 0.381...           6   \n",
       "295  [-0.255673, -0.267011, -0.283683, -0.29422, -0...           6   \n",
       "296  [0.334985, 0.368663, 0.395873, 0.406426, 0.397...           6   \n",
       "297  [0.427501, 0.428601, 0.428134, 0.413496, 0.402...           6   \n",
       "298  [0.050201, 0.0497175, 0.0495675, 0.0564866, 0....           6   \n",
       "299  [-0.675771, -0.671053, -0.652646, -0.632039, -...           6   \n",
       "300  [-0.408144, -0.407043, -0.402008, -0.397757, -...           6   \n",
       "301  [-1.11209, -1.11146, -1.0954, -1.07763, -1.058...           6   \n",
       "302  [0.129962, 0.130279, 0.13328, 0.129812, 0.1073...           6   \n",
       "303  [0.120526, 0.121009, 0.117541, 0.108105, 0.106...           0   \n",
       "\n",
       "    QUARTER_HOUR WEEK_OF_YEAR  \n",
       "0             40           33  \n",
       "1             40           33  \n",
       "2             41           33  \n",
       "3             42           33  \n",
       "4             41           33  \n",
       "5             42           33  \n",
       "6             40           33  \n",
       "7             40           33  \n",
       "8             40           33  \n",
       "9             44           33  \n",
       "10            44           33  \n",
       "11            45           33  \n",
       "12            43           33  \n",
       "13            41           33  \n",
       "14            42           33  \n",
       "15            37           33  \n",
       "16            42           33  \n",
       "17            44           33  \n",
       "18            45           33  \n",
       "19            47           33  \n",
       "20            46           33  \n",
       "21            46           33  \n",
       "22            48           33  \n",
       "23            44           33  \n",
       "24            41           33  \n",
       "25            45           33  \n",
       "26            48           33  \n",
       "27            41           33  \n",
       "28            47           33  \n",
       "29            45           33  \n",
       "..           ...          ...  \n",
       "274           29           51  \n",
       "275           30           51  \n",
       "276           32           51  \n",
       "277           32           51  \n",
       "278           33           51  \n",
       "279           32           51  \n",
       "280           31           51  \n",
       "281           32           51  \n",
       "282           34           51  \n",
       "283           52           51  \n",
       "284           31           51  \n",
       "285           28           51  \n",
       "286           35           51  \n",
       "287           35           51  \n",
       "288           27           51  \n",
       "289           36           51  \n",
       "290           38           51  \n",
       "291           36           51  \n",
       "292           33           51  \n",
       "293           40           51  \n",
       "294           16           51  \n",
       "295           40           51  \n",
       "296           35           51  \n",
       "297           42           51  \n",
       "298           42           51  \n",
       "299           45           51  \n",
       "300           47           51  \n",
       "301           62           51  \n",
       "302           40           51  \n",
       "303            3           52  \n",
       "\n",
       "[304 rows x 16 columns]"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_val"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
